From 4050fd9d042ae72286e57b226f679017dd50f1fa Mon Sep 17 00:00:00 2001 From: tkaruturi Date: Fri, 27 Sep 2024 00:00:35 -0700 Subject: [PATCH 1/2] Add sort util for 1D tensors Differential Revision: D55577025 --- kernels/portable/cpu/util/sort_util.cpp | 73 +++++++++++++++++++ kernels/portable/cpu/util/sort_util.h | 25 +++++++ kernels/portable/cpu/util/targets.bzl | 11 +++ .../portable/cpu/util/test/sort_util_test.cpp | 45 ++++++++++++ kernels/portable/cpu/util/test/targets.bzl | 10 +++ 5 files changed, 164 insertions(+) create mode 100644 kernels/portable/cpu/util/sort_util.cpp create mode 100644 kernels/portable/cpu/util/sort_util.h create mode 100644 kernels/portable/cpu/util/test/sort_util_test.cpp diff --git a/kernels/portable/cpu/util/sort_util.cpp b/kernels/portable/cpu/util/sort_util.cpp new file mode 100644 index 00000000000..c57053a5088 --- /dev/null +++ b/kernels/portable/cpu/util/sort_util.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "executorch/kernels/portable/cpu/util/sort_util.h" +#include +#include + +namespace torch { +namespace executor { + +using Tensor = exec_aten::Tensor; + +Error sort_tensor( + const Tensor& tensor, + Tensor& sorted_tensor, + Tensor& sorted_indices, + bool descending) { + // Check if the input tensor is a valid input + ET_CHECK_MSG(tensor.dim() == 1, "Input tensor must be 1D"); + + // Check if the output tensors are valid + ET_CHECK_MSG(sorted_tensor.dim() == 1, "Output tensor must be 1D"); + ET_CHECK_MSG(sorted_indices.dim() == 1, "Output tensor must be 1D"); + + // Check if the output tensors have the same dtype + ET_CHECK_MSG( + tensor.scalar_type() == sorted_tensor.scalar_type(), + "Input and output tensors must have the same dtype"); + ET_CHECK_MSG( + tensor.scalar_type() == ScalarType::Float, + "Only float inputs are supported currently"); + ET_CHECK_MSG( + sorted_indices.scalar_type() == exec_aten::ScalarType::Long, + "Output tensor must be of type int64"); + + // Get the number of elements in the tensor + int size = tensor.numel(); + + // Create a tensor to store the indices + for (int i = 0; i < size; i++) { + sorted_indices.mutable_data_ptr()[i] = i; + } + + // Sort the indices based on the corresponding tensor values + std::sort( + sorted_indices.mutable_data_ptr(), + sorted_indices.mutable_data_ptr() + size, + [&tensor, descending](int64_t i, int64_t j) { + if (descending) { + return tensor.const_data_ptr()[i] > + tensor.const_data_ptr()[j]; + } else { + return tensor.const_data_ptr()[i] < + tensor.const_data_ptr()[j]; + } + }); + + // Rearrange the tensor values based on the sorted indices + for (int i = 0; i < size; i++) { + sorted_tensor.mutable_data_ptr()[i] = tensor.const_data_ptr< + float>()[sorted_indices.const_data_ptr()[i]]; + } + + return Error::Ok; +} + +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/sort_util.h b/kernels/portable/cpu/util/sort_util.h new file mode 100644 index 00000000000..9095490b327 --- /dev/null +++ b/kernels/portable/cpu/util/sort_util.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace torch { +namespace executor { + +using Tensor = exec_aten::Tensor; + +Error sort_tensor( + const Tensor& tensor, + Tensor& sorted_tensor, + Tensor& sorted_indice, + bool descending = false); + +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl index 82d3d84fa23..e271ab06bff 100644 --- a/kernels/portable/cpu/util/targets.bzl +++ b/kernels/portable/cpu/util/targets.bzl @@ -237,6 +237,17 @@ def define_common_targets(): visibility = ["//executorch/kernels/portable/cpu/..."], ) + runtime.cxx_library( + name = "sort_util", + srcs = ["sort_util.cpp"], + exported_headers = ["sort_util.h"], + deps = [ + "//executorch/runtime/kernel:kernel_includes", + "//executorch/runtime/core/exec_aten/util:tensor_util", + ], + visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/torchvision/..."], + ) + # Utility functions that can be used by operators that perform reduction for aten_mode in [True, False]: suffix = "_aten" if aten_mode else "" diff --git a/kernels/portable/cpu/util/test/sort_util_test.cpp b/kernels/portable/cpu/util/test/sort_util_test.cpp new file mode 100644 index 00000000000..e5dbfbd4b30 --- /dev/null +++ b/kernels/portable/cpu/util/test/sort_util_test.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include + +#include + +using namespace ::testing; +using exec_aten::ScalarType; +using exec_aten::Tensor; +using torch::executor::ArrayRef; +using torch::executor::testing::TensorFactory; + +TEST(SortUtilTest, SortTensorTest) { + TensorFactory tf; + TensorFactory lf; + + Tensor a = tf.make({4}, {3, 2, 1, 4}); + Tensor b = tf.zeros({4}); + Tensor c = lf.zeros({4}); + + // Ascending order sort test + sort_tensor(a, b, c); + + Tensor expected = tf.make({4}, {1, 2, 3, 4}); + Tensor expected_indices = lf.make({4}, {2, 1, 0, 3}); + EXPECT_TENSOR_EQ(b, expected); + EXPECT_TENSOR_EQ(c, expected_indices); + + // Descending order sort test + sort_tensor(a, b, c, true); + expected = tf.make({4}, {4, 3, 2, 1}); + expected_indices = lf.make({4}, {3, 0, 1, 2}); + EXPECT_TENSOR_EQ(b, expected); + EXPECT_TENSOR_EQ(c, expected_indices); +} diff --git a/kernels/portable/cpu/util/test/targets.bzl b/kernels/portable/cpu/util/test/targets.bzl index 28988b90dcc..23a6a7bfe01 100644 --- a/kernels/portable/cpu/util/test/targets.bzl +++ b/kernels/portable/cpu/util/test/targets.bzl @@ -21,3 +21,13 @@ def define_common_targets(): "//executorch/kernels/portable/cpu/util:reduce_util", ], ) + + runtime.cxx_test( + name = "sort_util_test", + srcs = ["sort_util_test.cpp"], + deps = [ + "//executorch/runtime/core/exec_aten:lib", + "//executorch/runtime/core/exec_aten/testing_util:tensor_util", + "//executorch/kernels/portable/cpu/util:sort_util", + ], + ) From af92d4e9cfeec7bfc7125a0069e675284031d121 Mon Sep 17 00:00:00 2001 From: Tarun Karuturi Date: Mon, 30 Sep 2024 14:57:10 -0700 Subject: [PATCH 2/2] Make make_tensor in broadcast utilities public and rename free_broadcast_tensor (#2785) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/2785 This diff does a couple of things: - Makes `make_tensor` a public function so that we can create temporary intermediate tensors in operators that need to do so. (Such as NMS that is implemented above in this stack) - Renames `free_broadcast_tensor` to a more generic name `free_tensor` Differential Revision: D55577026 --- kernels/portable/cpu/util/broadcast_util.cpp | 20 +++++------ kernels/portable/cpu/util/broadcast_util.h | 33 +++++++++++++++---- .../portable/cpu/util/test/broadcast_test.cpp | 8 ++--- 3 files changed, 38 insertions(+), 23 deletions(-) diff --git a/kernels/portable/cpu/util/broadcast_util.cpp b/kernels/portable/cpu/util/broadcast_util.cpp index 943219490b0..0ebe78d5d56 100644 --- a/kernels/portable/cpu/util/broadcast_util.cpp +++ b/kernels/portable/cpu/util/broadcast_util.cpp @@ -18,17 +18,15 @@ namespace executor { using Tensor = exec_aten::Tensor; using ScalarType = exec_aten::ScalarType; -void free_broadcast_tensor(const Tensor& broadcast_tensor) { - free((void*)broadcast_tensor.const_data_ptr()); - free((void*)broadcast_tensor.sizes().data()); - free((void*)broadcast_tensor.dim_order().data()); - free((void*)broadcast_tensor.strides().data()); - free(broadcast_tensor.unsafeGetTensorImpl()); +void free_tensor(const Tensor& tensor) { + free((void*)tensor.const_data_ptr()); + free((void*)tensor.sizes().data()); + free((void*)tensor.dim_order().data()); + free((void*)tensor.strides().data()); + free(tensor.unsafeGetTensorImpl()); } -namespace { - -Tensor make_tensor( +Tensor allocate_tensor( const ArrayRef& sizes, const ArrayRef& dim_order, const ArrayRef& strides, @@ -73,8 +71,6 @@ Tensor make_tensor( return Tensor{tensor_impl}; } -} // namespace - bool tensor_is_broadcastable_to( const exec_aten::ArrayRef broadcast_from_shape, const exec_aten::ArrayRef broadcast_to_shape) { @@ -171,7 +167,7 @@ Tensor broadcast_tensor( // Once we have discovered that broadcast_from can be broadcasted into // broadcast_to, use repeat() to do the broadcast. - Tensor out = make_tensor( + Tensor out = allocate_tensor( broadcast_to_shape, broadcast_to_dim_order, broadcast_to_strides, diff --git a/kernels/portable/cpu/util/broadcast_util.h b/kernels/portable/cpu/util/broadcast_util.h index 92d35f322fb..a563ac36c41 100644 --- a/kernels/portable/cpu/util/broadcast_util.h +++ b/kernels/portable/cpu/util/broadcast_util.h @@ -62,6 +62,23 @@ bool tensors_are_broadcastable_between( */ bool tensors_are_broadcastable_between(const Tensor& a, const Tensor& b); +/** + * Create a new tensor with the given sizes, dim_order, and strides. Memory + * is dynamically allocated within this function and the tensor must be freed + * only using free_tensor. + * + * @param[in] sizes The sizes of the tensor. + * @param[in] dim_order The dim order of the tensor. + * @param[in] strides The strides of the tensor. + * @param[in] dtype The data type of the tensor. + * @returns A new tensor with the given sizes, dim_order, and strides. + */ +Tensor allocate_tensor( + const ArrayRef& sizes, + const ArrayRef& dim_order, + const ArrayRef& strides, + const ScalarType& dtype); + /** * DEPRECATED: Use `delinearize_index()` and `linearize_access_indexes()` for * index remapping to avoid memory allocation. @@ -75,7 +92,7 @@ bool tensors_are_broadcastable_between(const Tensor& a, const Tensor& b); * @param[in] broadcast_to The tensor to which we want to broadcast to. * @returns A new tensor with the same shape as broadcast_to and the data * repeated as appropriate. This tensor contains dynamically allocated memory - * and must be freed using free_broadcast_tensor. + * and must be freed using free_tensor. */ ET_DEPRECATED exec_aten::Tensor broadcast_tensor( const exec_aten::Tensor& broadcast_from, @@ -192,19 +209,21 @@ ET_NODISCARD inline Error resize_to_broadcast_target_size( } /** - * DEPRECATED: Use `delinearize_index()` and `linearize_access_indexes()` for - * index remapping to avoid memory allocation. - * - * Free the dynamically allocated memory in broadcast_tensor. This should only - * be used on a tensor returned by broadcast_tensor. * * @param[in] The tensor that was previosuly returned by a call to - * broadcast_tensor. + * allocate_tensor. * @returns void */ ET_DEPRECATED void free_broadcast_tensor( const exec_aten::Tensor& broadcast_tensor); +/** + * Free the dynamically allocated memory in allocate_tensor. This should only + * be used on a tensor returned by allocate_tensor. + * + */ +void free_tensor(const exec_aten::Tensor& allocated_tensor); + /** * Delinearize a flattened index to per-dimension indexes. * diff --git a/kernels/portable/cpu/util/test/broadcast_test.cpp b/kernels/portable/cpu/util/test/broadcast_test.cpp index d87e8ecec85..87ea8714236 100644 --- a/kernels/portable/cpu/util/test/broadcast_test.cpp +++ b/kernels/portable/cpu/util/test/broadcast_test.cpp @@ -38,11 +38,11 @@ TEST(BroadcastUtilTest, BroadcastTensor) { Tensor d = torch::executor::broadcast_tensor(a, c); EXPECT_TENSOR_DATA_EQ(d, tf.make({2, 2}, {2, 2, 2, 2})); - torch::executor::free_broadcast_tensor(d); + torch::executor::free_tensor(d); d = torch::executor::broadcast_tensor(b, c); EXPECT_TENSOR_DATA_EQ(d, tf.make({2, 2}, {2, 2, 2, 2})); - torch::executor::free_broadcast_tensor(d); + torch::executor::free_tensor(d); } TEST(BroadcastUtilTest, BroadcastableBetween) { @@ -69,12 +69,12 @@ TEST(BroadcastUtilTest, BroadcastableToFrom) { ASSERT_TRUE(tensor_is_broadcastable_to(a, c)); Tensor d = torch::executor::broadcast_tensor(a, c); EXPECT_TENSOR_DATA_EQ(d, tf.make({2, 2}, {2, 2, 2, 2})); - torch::executor::free_broadcast_tensor(d); + torch::executor::free_tensor(d); ASSERT_TRUE(tensor_is_broadcastable_to(b, c)); d = torch::executor::broadcast_tensor(b, c); EXPECT_TENSOR_DATA_EQ(d, tf.make({2, 2}, {2, 2, 2, 2})); - torch::executor::free_broadcast_tensor(d); + torch::executor::free_tensor(d); } TEST(BroadcastUtilTest, NotBroadcastableTo) {