From a9baded7794bfc89b0368cd1967091eca666a594 Mon Sep 17 00:00:00 2001 From: Lucy Qiu Date: Mon, 15 Sep 2025 17:10:45 -0700 Subject: [PATCH] Add minimum_length to extended header for BC (#14320) Summary: https://github.com/pytorch/executorch/pull/14320 Error happening when we have older PTE files with extended header size 24. When we call 'from_bytes', we expect header size 32 after adding segment_data_size field in D81938296 This is BC on C++ side because we have a minimum length. Add minimum length to python to make the change BC. Reviewed By: JacobSzwejbka, hyxu2006 Differential Revision: D82492169 --- exir/_serialize/_program.py | 36 +++++++++++++++++------- exir/_serialize/test/test_program.py | 41 ++++++++++++++++++++++++---- schema/extended_header.h | 3 +- 3 files changed, 64 insertions(+), 16 deletions(-) diff --git a/exir/_serialize/_program.py b/exir/_serialize/_program.py index 448a3afb90c..35a452c22ed 100644 --- a/exir/_serialize/_program.py +++ b/exir/_serialize/_program.py @@ -136,8 +136,7 @@ class _ExtendedHeader: # The magic bytes that should be at the beginning of the header. EXPECTED_MAGIC: ClassVar[bytes] = b"eh00" - # The length of the header in bytes. - EXPECTED_LENGTH: ClassVar[int] = ( + MINIMUM_LENGTH: ClassVar[int] = ( # Header magic 4 # Header length @@ -146,10 +145,19 @@ class _ExtendedHeader: + 8 # Segment base offset + 8 + ) + # The length of the header in bytes. + EXPECTED_LENGTH: ClassVar[int] = ( + MINIMUM_LENGTH # Segment data size + 8 ) + # To find the header, callers should provide at least this many bytes of + # the head of the serialized Program data. Keep this in sync with + # kNumHeadBytes in //executorch/schema/extended_header.cpp + NUM_HEAD_BYTES: ClassVar[int] = 64 + # Instance attributes. @dataclass will turn these into ctor args. # The size of the serialized program data in bytes. @@ -187,21 +195,29 @@ def from_bytes(data: bytes) -> "_ExtendedHeader": + f"< {_ExtendedHeader.EXPECTED_LENGTH}" ) + magic = data[0:4] + length = int.from_bytes(data[4:8], byteorder=_HEADER_BYTEORDER) + program_size = int.from_bytes(data[8:16], byteorder=_HEADER_BYTEORDER) + segment_base_offset = int.from_bytes(data[16:24], byteorder=_HEADER_BYTEORDER) + segment_data_size = ( + int.from_bytes(data[24:32], byteorder=_HEADER_BYTEORDER) + if length > _ExtendedHeader.MINIMUM_LENGTH + else 0 + ) + return _ExtendedHeader( - magic=data[0:4], - length=int.from_bytes(data[4:8], byteorder=_HEADER_BYTEORDER), - program_size=int.from_bytes(data[8:16], byteorder=_HEADER_BYTEORDER), - segment_base_offset=int.from_bytes( - data[16:24], byteorder=_HEADER_BYTEORDER - ), - segment_data_size=int.from_bytes(data[24:32], byteorder=_HEADER_BYTEORDER), + magic=magic, + length=length, + program_size=program_size, + segment_base_offset=segment_base_offset, + segment_data_size=segment_data_size, ) def is_valid(self) -> bool: """Returns true if the extended header appears to be well-formed.""" return ( self.magic == _ExtendedHeader.EXPECTED_MAGIC - and self.length >= _ExtendedHeader.EXPECTED_LENGTH + and self.length >= _ExtendedHeader.MINIMUM_LENGTH ) def to_bytes(self) -> bytes: diff --git a/exir/_serialize/test/test_program.py b/exir/_serialize/test/test_program.py index 18803da05b6..7ed83569169 100644 --- a/exir/_serialize/test/test_program.py +++ b/exir/_serialize/test/test_program.py @@ -1009,7 +1009,7 @@ def test_named_data_segments(self) -> None: EXAMPLE_HEADER_DATA: bytes = ( # Magic bytes b"eh00" - # uint32_t header size (little endian) + # uint32_t header size (little endian). 0x20 --> 32 bytes. + b"\x20\x00\x00\x00" # uint64_t program size + b"\x44\x33\x44\x33\x22\x11\x22\x11" @@ -1017,6 +1017,22 @@ def test_named_data_segments(self) -> None: + b"\x88\x77\x88\x77\x66\x55\x66\x55" # uint64_t segment data size + b"\x22\x33\x22\x33\x44\x55\x44\x55" + # Padding; provide at least NUM_HEAD_BYTES for the header. + + b"\x99" * (_ExtendedHeader.NUM_HEAD_BYTES - 32) +) + +# Minimum fields in an extended header (no segment data size). +EXAMPLE_HEADER_DATA_MIN: bytes = ( + # Magic bytes + b"eh00" + # uint32_t header size (little endian). 0x18 --> 24 bytes. + + b"\x18\x00\x00\x00" + # uint64_t program size + + b"\x44\x33\x44\x33\x22\x11\x22\x11" + # uint64_t segment base offset + + b"\x88\x77\x88\x77\x66\x55\x66\x55" + # Padding; provide at least NUM_HEAD_BYTES for the header. + + b"\x99" * (_ExtendedHeader.NUM_HEAD_BYTES - 24) ) @@ -1028,7 +1044,7 @@ def test_to_bytes(self) -> None: segment_data_size=EXAMPLE_SEGMENT_DATA_SIZE, ) self.assertTrue(eh.is_valid()) - self.assertEqual(eh.to_bytes(), EXAMPLE_HEADER_DATA) + self.assertEqual(eh.to_bytes(), EXAMPLE_HEADER_DATA[0:32]) def test_to_bytes_with_non_defaults(self) -> None: eh = _ExtendedHeader( @@ -1045,11 +1061,11 @@ def test_to_bytes_with_non_defaults(self) -> None: # But still produces a valid output header, since to_bytes() ignores # magic and length. - self.assertEqual(eh.to_bytes(), EXAMPLE_HEADER_DATA) + self.assertEqual(eh.to_bytes(), EXAMPLE_HEADER_DATA[0:32]) def test_from_bytes_valid(self) -> None: # Parse the serialized extended header. - eh = _ExtendedHeader.from_bytes(EXAMPLE_HEADER_DATA) + eh = _ExtendedHeader.from_bytes(EXAMPLE_HEADER_DATA[0:32]) # This is a valid header: good magic and length. self.assertTrue(eh.is_valid()) @@ -1060,6 +1076,20 @@ def test_from_bytes_valid(self) -> None: self.assertEqual(eh.segment_base_offset, EXAMPLE_SEGMENT_BASE_OFFSET) self.assertEqual(eh.segment_data_size, EXAMPLE_SEGMENT_DATA_SIZE) + def test_from_bytes_minimum(self) -> None: + # Parse the serialized extended header. + eh = _ExtendedHeader.from_bytes(EXAMPLE_HEADER_DATA_MIN) + + # This is a valid header: good magic and length. + self.assertTrue(eh.is_valid()) + + self.assertEqual(eh.magic, _ExtendedHeader.EXPECTED_MAGIC) + self.assertEqual(eh.length, _ExtendedHeader.MINIMUM_LENGTH) + self.assertEqual(eh.program_size, EXAMPLE_PROGRAM_SIZE) + self.assertEqual(eh.segment_base_offset, EXAMPLE_SEGMENT_BASE_OFFSET) + # Does not contain segment_data_size; should be 0 + self.assertEqual(eh.segment_data_size, 0) + def test_from_bytes_with_more_data_than_necessary(self) -> None: # Pass in more data than necessary to parse the header. header_data_with_suffix = EXAMPLE_HEADER_DATA + b"\x55" * 16 @@ -1167,4 +1197,5 @@ def test_from_bytes_invalid_length(self) -> None: self.assertEqual(eh.length, 16) self.assertEqual(eh.program_size, EXAMPLE_PROGRAM_SIZE) self.assertEqual(eh.segment_base_offset, EXAMPLE_SEGMENT_BASE_OFFSET) - self.assertEqual(eh.segment_data_size, EXAMPLE_SEGMENT_DATA_SIZE) + # Length cut short; segment_data_size parsed as 0. + self.assertEqual(eh.segment_data_size, 0) diff --git a/schema/extended_header.h b/schema/extended_header.h index 7b37dc3df49..c90483808a1 100644 --- a/schema/extended_header.h +++ b/schema/extended_header.h @@ -22,7 +22,8 @@ namespace runtime { struct ExtendedHeader { /** * To find the header, callers should provide at least this many bytes of the - * head of the serialized Program data. + * head of the serialized Program data. Keep this in sync with NUM_HEAD_BYTES + * in //executorch/exir/_serialize/program.py */ static constexpr size_t kNumHeadBytes = 64;