From f2d0cc8ef7070b779cbb339ad73f62d67909fbd0 Mon Sep 17 00:00:00 2001 From: Lucy Qiu Date: Thu, 29 Aug 2024 22:53:20 -0700 Subject: [PATCH] Preprocess C++ (#4987) Summary: Preprocess C++ runner calculations. Mirror torchtune's helper functions in C++, and tests - find_supported_resolutions - get_canvas_best_fit - get_inscribed_size Functions: https://github.com/pytorch/torchtune/tree/main/torchtune/modules/transforms/vision_utils Reviewed By: mergennachin Differential Revision: D61833480 --- .../models/flamingo/preprocess/preprocess.cpp | 118 ++++++++++++++++++ .../models/flamingo/preprocess/preprocess.h | 41 ++++++ .../flamingo/preprocess/preprocess_test.cpp | 113 +++++++++++++++++ .../models/flamingo/preprocess/targets.bzl | 20 +++ 4 files changed, 292 insertions(+) create mode 100644 examples/models/flamingo/preprocess/preprocess.cpp create mode 100644 examples/models/flamingo/preprocess/preprocess.h create mode 100644 examples/models/flamingo/preprocess/preprocess_test.cpp create mode 100644 examples/models/flamingo/preprocess/targets.bzl diff --git a/examples/models/flamingo/preprocess/preprocess.cpp b/examples/models/flamingo/preprocess/preprocess.cpp new file mode 100644 index 00000000000..ff46070f669 --- /dev/null +++ b/examples/models/flamingo/preprocess/preprocess.cpp @@ -0,0 +1,118 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "executorch/examples/models/flamingo/preprocess/preprocess.h" + +#include +#include + +std::vector _get_factors(int n) { + std::vector factors; + for (int i = 1; i <= n; i++) { + if (n % i == 0) { + factors.push_back(i); + } + } + return factors; +} + +std::vector> find_supported_resolutions( + int max_num_tiles, + int tile_size) { + std::vector> supported_resolutions; + for (int _tile_size = max_num_tiles; _tile_size > 0; _tile_size--) { + auto factors = _get_factors(_tile_size); + for (int i = 0; i < factors.size(); i++) { + int height = factors[i]; + int width = _tile_size / factors[i]; + supported_resolutions.push_back({height * tile_size, width * tile_size}); + } + } + return supported_resolutions; +} + +std::vector get_canvas_best_fit( + std::vector image_size, + std::vector> possible_resolutions, + bool resize_to_max_canvas) { + assert(image_size.size() == 2); + int image_h = image_size[0]; + int image_w = image_size[1]; + + float best_scale = -0.1; + std::vector best_resolution; + int best_area = 0; + + for (int i = 0; i < possible_resolutions.size(); i++) { + assert(possible_resolutions[i].size() == 2); + float scale_h = possible_resolutions[i][0] / (float)image_h; + float scale_w = possible_resolutions[i][1] / (float)image_w; + + // Get limiting side scaling -> no distortion + float scale = scale_h < scale_w ? scale_h : scale_w; + + bool is_candidate = false; + + if (scale >= 1.0) { + // Upscaling options. + if (resize_to_max_canvas) { + is_candidate = scale >= best_scale; + } else { + is_candidate = ((scale <= best_scale) || (best_resolution.size() == 0)); + } + } else { + // If no upscaling options, find the minimum downscaling (max scale for + // scales < 1) + is_candidate = ((scale >= best_scale) || (best_resolution.size() == 0)); + } + + // Select the best resolution. + if (is_candidate) { + // @lint-ignore CLANGTIDY facebook-hte-ParameterUncheckedArrayBounds + int area = possible_resolutions[i][0] * possible_resolutions[i][1]; + if (scale == best_scale) { + // If there are multiple resolutions, get the one with minimum area to + // reduce padding. + if (scale >= 1.0 && area < best_area) { + best_resolution = possible_resolutions[i]; + best_area = area; + } + } else { + best_resolution = possible_resolutions[i]; + best_scale = scale; + best_area = area; + } + } + } + return best_resolution; +} + +std::vector get_inscribed_size( + std::vector image_size, + std::vector target_size, + int max_size) { + assert(image_size.size() == 2); + assert(target_size.size() == 2); + + int target_height = target_size[0]; + int target_width = target_size[1]; + + if (max_size > 0) { + target_height = std::min(std::max(image_size[0], max_size), target_size[0]); + target_width = std::min(std::max(image_size[1], max_size), target_size[1]); + } + + int resize_height = std::min( + (int)(image_size[0] * (target_width / (float)image_size[1])), + target_height); + int resize_width = std::min( + (int)(image_size[1] * (target_height / (float)image_size[0])), + target_width); + + return {resize_height, resize_width}; +} diff --git a/examples/models/flamingo/preprocess/preprocess.h b/examples/models/flamingo/preprocess/preprocess.h new file mode 100644 index 00000000000..f6c7b813e95 --- /dev/null +++ b/examples/models/flamingo/preprocess/preprocess.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +// C++ implementation of the python functions in torchtune: +// https://github.com/pytorch/torchtune/tree/main/torchtune/modules/transforms/vision_utils + +// Calculate all factors of a given number. +std::vector _get_factors(int n); + +// Computes all combinations of resolutions, multiple of tile_size, +// that contain up to max_num_tiles. Useful for when dividing an image into +// tiles. For example, if we want at most 2 tiles per image, then we can support +// the following resolutions: (1x1, 1x2, 2x1) * tile_size Returns a vector of +// tuples of (height, width). +std::vector> find_supported_resolutions( + int max_num_tiles, + int tile_size); + +// Determines the best canvas possible from a list of possible resolutions to +// resize an image to, without distortion. +std::vector get_canvas_best_fit( + std::vector image_size, + std::vector> possible_resolutions, + bool resize_to_max_canvas); + +// Calculates the size of an image, if it was resized to be inscribed within the +// target_size. It is upscaled or downscaled such that one size is equal to the +// target_size, and the second size is less than or equal to the target_size. +std::vector get_inscribed_size( + std::vector image_size, + std::vector canvas_size, + int max_size); diff --git a/examples/models/flamingo/preprocess/preprocess_test.cpp b/examples/models/flamingo/preprocess/preprocess_test.cpp new file mode 100644 index 00000000000..deede877223 --- /dev/null +++ b/examples/models/flamingo/preprocess/preprocess_test.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +using namespace ::testing; + +// Mirror the torchtune python testing: +// https://github.com/pytorch/torchtune/tree/main/tests/torchtune/modules/transforms + +void test_find_supported_resolutions( + int max_num_tiles, + int tile_size, + std::vector> expected_resolutions) { + std::vector> resolutions = + find_supported_resolutions(max_num_tiles, tile_size); + + EXPECT_EQ(resolutions.size(), expected_resolutions.size()); + + for (int i = 0; i < resolutions.size(); i++) { + EXPECT_EQ(resolutions[i].size(), expected_resolutions[i].size()); + EXPECT_EQ(resolutions[i][0], expected_resolutions[i][0]); // height + EXPECT_EQ(resolutions[i][1], expected_resolutions[i][1]); // width + } +} + +TEST(PreprocessTest, TestFindSupportedResolution) { + test_find_supported_resolutions(1, 224, {{224, 224}}); + test_find_supported_resolutions(2, 100, {{100, 200}, {200, 100}, {100, 100}}); + test_find_supported_resolutions( + 3, 50, {{50, 150}, {150, 50}, {50, 100}, {100, 50}, {50, 50}}); + test_find_supported_resolutions( + 4, + 300, + { + {300, 1200}, + {600, 600}, + {1200, 300}, + {300, 900}, + {900, 300}, + {300, 600}, + {600, 300}, + {300, 300}, + }); +} + +void test_get_canvas_best_fit( + std::vector image_size, + std::vector> possible_resolutions, + bool resize_to_max_canvas, + std::vector expected_best_resolution) { + std::vector best_resolution = get_canvas_best_fit( + image_size, possible_resolutions, resize_to_max_canvas); + EXPECT_EQ(best_resolution[0], expected_best_resolution[0]); // height + EXPECT_EQ(best_resolution[1], expected_best_resolution[1]); // width +} + +TEST(PreprocessTest, TestGetCanvasBestFit_200x300_F) { + std::vector> possible_resolutions = { + {224, 896}, + {448, 448}, + {224, 224}, + {896, 224}, + {224, 672}, + {672, 224}, + {224, 448}, + {448, 224}, + }; + test_get_canvas_best_fit( + {200, 300}, + possible_resolutions, + false, // resize_to_max_canvas + {224, 448}); + + test_get_canvas_best_fit( + {200, 500}, + possible_resolutions, + true, // resize_to_max_canvas + {224, 672}); + test_get_canvas_best_fit( + {200, 200}, + possible_resolutions, + false, // resize_to_max_canvas + {224, 224}); + test_get_canvas_best_fit( + {200, 100}, + possible_resolutions, + true, // resize_to_max_canvas + {448, 224}); +} + +void test_get_inscribed_size( + std::vector image_size, + std::vector target_size, + int max_size, + std::vector expected_target_size) { + std::vector result = + get_inscribed_size(image_size, target_size, max_size); + EXPECT_EQ(result[0], expected_target_size[0]); // height + EXPECT_EQ(result[1], expected_target_size[1]); // width +} +TEST(PreprocessTest, GetInscribedSize) { + test_get_inscribed_size({200, 100}, {1000, 1200}, 600, {600, 300}); + test_get_inscribed_size({2000, 200}, {1000, 1200}, 2000, {1000, 100}); + test_get_inscribed_size({400, 200}, {1000, 1200}, -1, {1000, 500}); + test_get_inscribed_size({1000, 500}, {400, 300}, -1, {400, 200}); +} diff --git a/examples/models/flamingo/preprocess/targets.bzl b/examples/models/flamingo/preprocess/targets.bzl new file mode 100644 index 00000000000..fd60d94a907 --- /dev/null +++ b/examples/models/flamingo/preprocess/targets.bzl @@ -0,0 +1,20 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +def define_common_targets(): + """Defines targets that should be shared between fbcode and xplat. + + The directory containing this targets.bzl file should also contain both + TARGETS and BUCK files that call this function. + """ + + runtime.cxx_library( + name = "preprocess", + srcs = ["preprocess.cpp"], + exported_headers = ["preprocess.h"], + ) + + runtime.cxx_test( + name = "preprocess_test", + srcs = ["preprocess_test.cpp"], + deps = [":preprocess"], + )