From 0bbd026f7db3123b5038cdd8af9175e6b114c77d Mon Sep 17 00:00:00 2001 From: lucylq Date: Wed, 16 Oct 2024 10:41:51 -0700 Subject: [PATCH] remove pad-max-tiles from preprocess --- .../preprocess/export_preprocess.py | 8 +-- .../preprocess/export_preprocess_lib.py | 2 - .../preprocess/test_preprocess.py | 51 +++---------------- 3 files changed, 12 insertions(+), 49 deletions(-) diff --git a/examples/models/llama3_2_vision/preprocess/export_preprocess.py b/examples/models/llama3_2_vision/preprocess/export_preprocess.py index a7d944a5b8d..58c79095074 100644 --- a/examples/models/llama3_2_vision/preprocess/export_preprocess.py +++ b/examples/models/llama3_2_vision/preprocess/export_preprocess.py @@ -13,17 +13,17 @@ def main(): - # Export - ep = export_preprocess() # ExecuTorch - et = lower_to_executorch_preprocess(ep) + ep_et = export_preprocess() + et = lower_to_executorch_preprocess(ep_et) with open("preprocess_et.pte", "wb") as file: et.write_to_file(file) # AOTInductor + ep_aoti = export_preprocess() torch._inductor.aot_compile( - ep.module(), + ep_aoti.module(), get_example_inputs(), options={"aot_inductor.output_path": "preprocess_aoti.so"}, ) diff --git a/examples/models/llama3_2_vision/preprocess/export_preprocess_lib.py b/examples/models/llama3_2_vision/preprocess/export_preprocess_lib.py index 53bb2e400d0..f3fe8188c04 100644 --- a/examples/models/llama3_2_vision/preprocess/export_preprocess_lib.py +++ b/examples/models/llama3_2_vision/preprocess/export_preprocess_lib.py @@ -43,7 +43,6 @@ def export_preprocess( max_num_tiles: int = 4, tile_size: int = 224, antialias: bool = False, - pad_max_tiles: bool = True, ) -> ExportedProgram: # Instantiate eager model. @@ -54,7 +53,6 @@ def export_preprocess( max_num_tiles=max_num_tiles, tile_size=tile_size, antialias=antialias, - pad_max_tiles=pad_max_tiles, ) # Replace non-exportable ops with custom ops. diff --git a/examples/models/llama3_2_vision/preprocess/test_preprocess.py b/examples/models/llama3_2_vision/preprocess/test_preprocess.py index 313097020a1..73a3fd29607 100644 --- a/examples/models/llama3_2_vision/preprocess/test_preprocess.py +++ b/examples/models/llama3_2_vision/preprocess/test_preprocess.py @@ -15,6 +15,11 @@ from executorch.extension.pybindings import portable_lib # noqa # usort: skip from executorch.extension.llm.custom_ops import sdpa_with_kv_cache # noqa # usort: skip +from executorch.examples.models.llama3_2_vision.preprocess.export_preprocess_lib import ( + export_preprocess, + get_example_inputs, + lower_to_executorch_preprocess, +) from executorch.extension.pybindings.portable_lib import ( _load_for_executorch_from_buffer, ) @@ -37,12 +42,6 @@ ) from torchvision.transforms.v2 import functional as F -from .export_preprocess_lib import ( - export_preprocess, - get_example_inputs, - lower_to_executorch_preprocess, -) - @dataclass class PreprocessConfig: @@ -54,7 +53,6 @@ class PreprocessConfig: tile_size: int = 224 max_num_tiles: int = 4 possible_resolutions = None - pad_max_tiles: bool = True class TestImageTransform(unittest.TestCase): @@ -137,17 +135,6 @@ def prepare_inputs( [1.0, 1.0], # expected_tile_max [0.0, 0.0], # expected_tile_min [1, 2], # expected_aspect_ratio - False, # pad_max_tiles - ), - ( - (100, 400, 3), # image_size - torch.Size([4, 3, 224, 224]), # expected shape - False, # resize_to_max_canvas - [0.2230, 0.1763, 0.0, 0.0], # expected_tile_means - [1.0, 1.0, 0.0, 0.0], # expected_tile_max - [0.0, 0.0, 0.0, 0.0], # expected_tile_min - [1, 2], # expected_aspect_ratio - True, # pad_max_tiles ), ( (1000, 300, 3), # image_size @@ -157,7 +144,6 @@ def prepare_inputs( [0.9976, 0.9940, 0.9936, 0.9906], # expected_tile_max [0.0037, 0.0047, 0.0039, 0.0], # expected_tile_min [4, 1], # expected_aspect_ratio - False, # pad_max_tiles ), ( (200, 200, 3), # image_size @@ -167,7 +153,6 @@ def prepare_inputs( [0.9921, 0.9925, 0.9969, 0.9908], # expected_tile_max [0.0056, 0.0069, 0.0059, 0.0032], # expected_tile_min [2, 2], # expected_aspect_ratio - False, # pad_max_tiles ), ( (600, 200, 3), # image_size @@ -177,17 +162,6 @@ def prepare_inputs( [1.0, 1.0, 1.0], # expected_tile_max [0.0, 0.0, 0.0], # expected_tile_min [3, 1], # expected_aspect_ratio - False, # pad_max_tiles - ), - ( - (600, 200, 3), # image_size - torch.Size([4, 3, 224, 224]), # expected shape - False, # resize_to_max_canvas - [0.4472, 0.4468, 0.3031, 0.0], # expected_tile_means - [1.0, 1.0, 1.0, 0.0], # expected_tile_max - [0.0, 0.0, 0.0, 0.0], # expected_tile_min - [3, 1], # expected_aspect_ratio - True, # pad_max_tiles ), ] ) @@ -200,11 +174,8 @@ def test_preprocess( expected_tile_max: List[float], expected_tile_min: List[float], expected_ar: List[int], - pad_max_tiles: bool, ) -> None: - config = PreprocessConfig( - resize_to_max_canvas=resize_to_max_canvas, pad_max_tiles=pad_max_tiles - ) + config = PreprocessConfig(resize_to_max_canvas=resize_to_max_canvas) reference_model = CLIPImageTransform( image_mean=config.image_mean, @@ -215,7 +186,6 @@ def test_preprocess( tile_size=config.tile_size, max_num_tiles=config.max_num_tiles, possible_resolutions=None, - pad_max_tiles=config.pad_max_tiles, ) eager_model = _CLIPImageTransform( @@ -225,7 +195,6 @@ def test_preprocess( antialias=config.antialias, tile_size=config.tile_size, max_num_tiles=config.max_num_tiles, - pad_max_tiles=config.pad_max_tiles, ) exported_model = export_preprocess( @@ -235,7 +204,6 @@ def test_preprocess( antialias=config.antialias, tile_size=config.tile_size, max_num_tiles=config.max_num_tiles, - pad_max_tiles=config.pad_max_tiles, ) executorch_model = lower_to_executorch_preprocess(exported_model) @@ -275,11 +243,8 @@ def test_preprocess( self.assertAlmostEqual(tile.min().item(), expected_tile_min[i], delta=1e-4) # Check num tiles matches the product of the aspect ratio. - if pad_max_tiles: - self.assertEqual(config.max_num_tiles, reference_image.shape[0]) - else: - expected_num_tiles = reference_ar[0] * reference_ar[1] - self.assertEqual(expected_num_tiles, reference_image.shape[0]) + expected_num_tiles = reference_ar[0] * reference_ar[1] + self.assertEqual(expected_num_tiles, reference_image.shape[0]) # Pre-work for eager and exported models. The reference model performs these # calculations and passes the result to _CLIPImageTransform, the exportable model.