From a86a975210e5de6958d6c6998e0dd6af93e568b1 Mon Sep 17 00:00:00 2001 From: Jack Zhang Date: Tue, 3 Sep 2024 11:13:55 -0700 Subject: [PATCH 1/3] Add logs for helping debug address space overflow issue --- exir/emit/_emitter.py | 16 +++++++++++++++- exir/passes/sym_shape_eval_pass.py | 4 ++-- exir/tensor.py | 5 ++++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/exir/emit/_emitter.py b/exir/emit/_emitter.py index 2d2cc0f3f18..f5152082c4d 100644 --- a/exir/emit/_emitter.py +++ b/exir/emit/_emitter.py @@ -79,6 +79,7 @@ TensorShapeDynamism, ) from executorch.exir.tensor import ( + AddressSpaceOverflowException, layout_enum, make_allocation_info, make_tensor_value, @@ -349,7 +350,20 @@ def _tensor_spec_to_evalue(self, spec: TensorSpec) -> EValue: self.node, f"Non-const tensor should be an activation tensor: mem_offset {spec.mem_offset}", ) - allocation_info = make_allocation_info(spec.mem_id, spec.mem_offset) + try: + allocation_info = make_allocation_info(spec.mem_id, spec.mem_offset) + except AddressSpaceOverflowException as e: + raise InternalError( + self._emit_node_specific_error( + self.node, + ( + f"{e}\nHint: If you are using a memory pass based on dynamic shape bounds, " + f"such as ConstraintBasedSymShapeEvalPass, this may be the cause of an " + f"unbacked SymInt with its upper bound lazily set to 2^64-1 (uint64 max) " + "during torch.export()." + ) + ) + ) if spec.const: # Tensor with a blob we need to serialize. May not actually be constant at runtime diff --git a/exir/passes/sym_shape_eval_pass.py b/exir/passes/sym_shape_eval_pass.py index f4d11ed8143..84834cddacd 100644 --- a/exir/passes/sym_shape_eval_pass.py +++ b/exir/passes/sym_shape_eval_pass.py @@ -181,7 +181,7 @@ class HintBasedSymShapeEvalPass(PassBase): Warning: if you're using torch.export with constrain API, this method doesn't respect the input constraints. - Not inherit from ExportPass since we simply need a way to iterate thru + Not inherited from ExportPass since we simply need a way to iterate thru every node's output. PassBase is easier for that purpose. """ @@ -245,7 +245,7 @@ class ConstraintBasedSymShapeEvalPass(PassBase): formula. We should convert those symbolic formula to concrete value for static/upperbound tensors so we can properly do memory planning for them. - Not inherit from ExportPass since we simply need a way to iterate thru + Not inherited from ExportPass since we simply need a way to iterate through every node's output. PassBase is easier for that purpose. """ diff --git a/exir/tensor.py b/exir/tensor.py index 7380a96ebc7..2665585139d 100644 --- a/exir/tensor.py +++ b/exir/tensor.py @@ -21,6 +21,9 @@ from executorch.exir.schema import ScalarType, TensorShapeDynamism from executorch.exir.sym_util import eval_shape +class AddressSpaceOverflowException(Exception): + pass + def num_bytes_from_shape_and_dtype(shape: torch.Size, dtype: torch.dtype) -> int: """ @@ -297,7 +300,7 @@ def make_allocation_info(mem_id: int, mem_offset: int) -> schema.AllocationDetai memory_offset_low = mem_offset & ((1 << 32) - 1) memory_offset_high = mem_offset >> 32 if memory_offset_high >= 1 << 32: - raise ValueError(f"mem_offset {mem_offset} does not fit in 64 bits") + raise AddressSpaceOverflowException(f"mem_offset {mem_offset} does not fit in 64 bits") allocation_info = schema.AllocationDetails( memory_id=mem_id, From 88901508fa5965d54e19ec790206d7251f71589b Mon Sep 17 00:00:00 2001 From: Jack Zhang Date: Tue, 3 Sep 2024 12:40:24 -0700 Subject: [PATCH 2/3] Lint --- exir/emit/_emitter.py | 3 +-- exir/tensor.py | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/exir/emit/_emitter.py b/exir/emit/_emitter.py index f5152082c4d..dea9cf6fd6a 100644 --- a/exir/emit/_emitter.py +++ b/exir/emit/_emitter.py @@ -361,7 +361,7 @@ def _tensor_spec_to_evalue(self, spec: TensorSpec) -> EValue: f"such as ConstraintBasedSymShapeEvalPass, this may be the cause of an " f"unbacked SymInt with its upper bound lazily set to 2^64-1 (uint64 max) " "during torch.export()." - ) + ), ) ) @@ -1541,7 +1541,6 @@ def placeholder( is_user_input = True if isinstance(target, str) and isinstance(spec, TensorSpec): - fqn, is_mutable_buffer = self._find_fqn_for_placeholder(target, spec) # From the fqn find the corresponding tensor diff --git a/exir/tensor.py b/exir/tensor.py index 2665585139d..d63ed5d2627 100644 --- a/exir/tensor.py +++ b/exir/tensor.py @@ -21,6 +21,7 @@ from executorch.exir.schema import ScalarType, TensorShapeDynamism from executorch.exir.sym_util import eval_shape + class AddressSpaceOverflowException(Exception): pass @@ -300,7 +301,9 @@ def make_allocation_info(mem_id: int, mem_offset: int) -> schema.AllocationDetai memory_offset_low = mem_offset & ((1 << 32) - 1) memory_offset_high = mem_offset >> 32 if memory_offset_high >= 1 << 32: - raise AddressSpaceOverflowException(f"mem_offset {mem_offset} does not fit in 64 bits") + raise AddressSpaceOverflowException( + f"mem_offset {mem_offset} does not fit in 64 bits" + ) allocation_info = schema.AllocationDetails( memory_id=mem_id, From b10de682c630faf875cbda16e21bdb3e07250aed Mon Sep 17 00:00:00 2001 From: Jack Zhang Date: Tue, 3 Sep 2024 14:29:36 -0700 Subject: [PATCH 3/3] Fix test --- exir/tests/test_tensor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exir/tests/test_tensor.py b/exir/tests/test_tensor.py index a5d197a85b7..c5383b0dac2 100644 --- a/exir/tests/test_tensor.py +++ b/exir/tests/test_tensor.py @@ -171,7 +171,7 @@ def test_allocation_info_fails(self) -> None: ) for test_case in test_cases: kwargs = test_case[0] - with self.assertRaisesRegex(ValueError, test_case[1], msg=f"{kwargs}"): + with self.assertRaisesRegex(Exception, test_case[1], msg=f"{kwargs}"): make_allocation_info(**kwargs) def test_contiguous_stride_from_shape(self) -> None: