From 4e18b4b840e3b1ff44f14b9aee90544d65bf59d4 Mon Sep 17 00:00:00 2001 From: tkaruturi Date: Wed, 9 Oct 2024 16:15:14 -0700 Subject: [PATCH 1/2] Add allocate tensor util that uses temp allocator Differential Revision: D64072692 --- .../cpu/util/allocate_tensor_util.cpp | 74 +++++++++++++++++++ .../portable/cpu/util/allocate_tensor_util.h | 18 +++++ kernels/portable/cpu/util/targets.bzl | 10 +++ .../cpu/util/test/allocate_tensor_test.cpp | 68 +++++++++++++++++ kernels/portable/cpu/util/test/targets.bzl | 10 +++ 5 files changed, 180 insertions(+) create mode 100644 kernels/portable/cpu/util/allocate_tensor_util.cpp create mode 100644 kernels/portable/cpu/util/allocate_tensor_util.h create mode 100644 kernels/portable/cpu/util/test/allocate_tensor_test.cpp diff --git a/kernels/portable/cpu/util/allocate_tensor_util.cpp b/kernels/portable/cpu/util/allocate_tensor_util.cpp new file mode 100644 index 00000000000..0bb10b6caff --- /dev/null +++ b/kernels/portable/cpu/util/allocate_tensor_util.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "executorch/kernels/portable/cpu/util/allocate_tensor_util.h" + + +namespace torch { +namespace executor { + +using Tensor = exec_aten::Tensor; +using ScalarType = exec_aten::ScalarType; + +Tensor allocate_tensor( + KernelRuntimeContext& ctx, + const ArrayRef& sizes, + const ArrayRef& dim_order, + const ArrayRef& strides, + const ScalarType& dtype) { + int dim = sizes.size(); + int size_nbytes = dim * sizeof(Tensor::SizesType); + Result temp_mem_res_size = ctx.allocate_temp(size_nbytes); + void* size_data_ptr = + temp_mem_res_size.ok() ? temp_mem_res_size.get() : nullptr; + ET_CHECK_MSG(size_data_ptr != nullptr, "Failed to malloc for size bytes"); + memcpy(size_data_ptr, sizes.data(), size_nbytes); + + // TODO(T145322324): can we remove the static cast once size is unsigned? + size_t dim_order_nbytes = + static_cast(dim) * sizeof(Tensor::DimOrderType); + Result temp_mem_res_dim_order = ctx.allocate_temp(dim_order_nbytes); + void* dim_order_data_ptr = + temp_mem_res_dim_order.ok() ? temp_mem_res_dim_order.get() : nullptr; + ET_CHECK_MSG( + dim_order_data_ptr != nullptr, "Failed to malloc for dim order bytes"); + memcpy(dim_order_data_ptr, dim_order.data(), dim_order_nbytes); + + int strides_nbytes = dim * sizeof(Tensor::StridesType); + Result temp_mem_res_strides = ctx.allocate_temp(strides_nbytes); + void* strides_data_ptr = + temp_mem_res_strides.ok() ? temp_mem_res_strides.get() : nullptr; + printf("strides_data_ptr: %p\n", strides_data_ptr); + fflush(stdout); + ET_CHECK_MSG( + strides_data_ptr != nullptr, "Failed to malloc for strides bytes"); + memcpy(strides_data_ptr, strides.data(), strides_nbytes); + + Result temp_mem_res_tensor = ctx.allocate_temp(sizeof(TensorImpl)); + auto tensor_impl = static_cast( + temp_mem_res_tensor.ok() ? temp_mem_res_tensor.get() : nullptr); + ET_CHECK_MSG(tensor_impl != nullptr, "Failed to malloc for data TensorImpl"); + + new (tensor_impl) TensorImpl( + dtype, + dim, + reinterpret_cast(size_data_ptr), + nullptr, + reinterpret_cast(dim_order_data_ptr), + reinterpret_cast(strides_data_ptr)); + + Result temp_mem_res_data = ctx.allocate_temp(tensor_impl->nbytes()); + void* data_ptr = temp_mem_res_data.ok() ? temp_mem_res_data.get() : nullptr; + ET_CHECK_MSG(data_ptr != nullptr, "Failed to malloc for data buffer"); + tensor_impl->set_data(data_ptr); + + return Tensor{tensor_impl}; +} + +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/allocate_tensor_util.h b/kernels/portable/cpu/util/allocate_tensor_util.h new file mode 100644 index 00000000000..cd9b10e0444 --- /dev/null +++ b/kernels/portable/cpu/util/allocate_tensor_util.h @@ -0,0 +1,18 @@ +// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary. + +#pragma once + +#include + +namespace torch { +namespace executor { + +Tensor allocate_tensor( + KernelRuntimeContext& ctx, + const ArrayRef& sizes, + const ArrayRef& dim_order, + const ArrayRef& strides, + const ScalarType& dtype); + +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl index 82d3d84fa23..3ee3ceff6dc 100644 --- a/kernels/portable/cpu/util/targets.bzl +++ b/kernels/portable/cpu/util/targets.bzl @@ -237,6 +237,16 @@ def define_common_targets(): visibility = ["//executorch/kernels/portable/cpu/..."], ) + runtime.cxx_library( + name = "allocate_tensor_util", + srcs = ["allocate_tensor_util.cpp"], + exported_headers = ["allocate_tensor_util.h"], + deps = [ + "//executorch/runtime/kernel:kernel_includes", + ], + visibility = ["//executorch/kernels/portable/cpu/..."], + ) + # Utility functions that can be used by operators that perform reduction for aten_mode in [True, False]: suffix = "_aten" if aten_mode else "" diff --git a/kernels/portable/cpu/util/test/allocate_tensor_test.cpp b/kernels/portable/cpu/util/test/allocate_tensor_test.cpp new file mode 100644 index 00000000000..dcfea3687a6 --- /dev/null +++ b/kernels/portable/cpu/util/test/allocate_tensor_test.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include +using ScalarType = exec_aten::ScalarType; + +class AllocateTest : public ::testing::Test { + protected: + void SetUp() override { + // Since these tests cause ET_LOG to be called, the PAL must be initialized + // first. + torch::executor::runtime_init(); + } +}; + +TEST(AllocateTest, AllocateTensor) { + uint8_t* temp_allocator_ptr = (uint8_t*)malloc(2048); + executorch::runtime::MemoryAllocator temp_allocator(2048, temp_allocator_ptr); + executorch::runtime::KernelRuntimeContext ctx(nullptr, &temp_allocator); + + executorch::aten::SizesType sizes[3] = {1, 2, 3}; + executorch::aten::DimOrderType dim_order[3] = {0, 1, 2}; + executorch::aten::StridesType strides[3] = {3, 3, 1}; + + torch::executor::ArrayRef sizes_ref(sizes, 3); + torch::executor::ArrayRef strides_ref(strides, 3); + torch::executor::ArrayRef dim_orders_ref( + dim_order, 3); + + torch::executor::allocate_tensor( + ctx, sizes, dim_order, strides, ScalarType::Float); + + free(temp_allocator_ptr); +} + +TEST(AllocateTest, FailAllocateTensor) { + torch::executor::runtime_init(); + + uint8_t* temp_allocator_ptr = (uint8_t*)malloc(16); + executorch::runtime::MemoryAllocator temp_allocator(16, temp_allocator_ptr); + executorch::runtime::KernelRuntimeContext ctx(nullptr, &temp_allocator); + + executorch::aten::SizesType sizes[3] = {1, 2, 3}; + executorch::aten::DimOrderType dim_order[3] = {0, 1, 2}; + executorch::aten::StridesType strides[3] = {3, 3, 1}; + + torch::executor::ArrayRef sizes_ref(sizes, 3); + torch::executor::ArrayRef strides_ref(strides, 3); + torch::executor::ArrayRef dim_orders_ref( + dim_order, 3); + + ET_EXPECT_DEATH( + torch::executor::allocate_tensor( + ctx, sizes, dim_order, strides, ScalarType::Float), + "Failed to malloc"); + + free(temp_allocator_ptr); +} diff --git a/kernels/portable/cpu/util/test/targets.bzl b/kernels/portable/cpu/util/test/targets.bzl index 28988b90dcc..39ac40fa603 100644 --- a/kernels/portable/cpu/util/test/targets.bzl +++ b/kernels/portable/cpu/util/test/targets.bzl @@ -21,3 +21,13 @@ def define_common_targets(): "//executorch/kernels/portable/cpu/util:reduce_util", ], ) + + runtime.cxx_test( + name = "allocate_tensor_test", + srcs = ["allocate_tensor_test.cpp"], + deps = [ + "//executorch/runtime/core/exec_aten:lib", + "//executorch/kernels/portable/cpu/util:allocate_tensor_util", + "//executorch/runtime/kernel:kernel_includes", + ], + ) From e52a09f85a9d4eef65ea5c162d5b39575bbfeafe Mon Sep 17 00:00:00 2001 From: Tarun Karuturi Date: Wed, 9 Oct 2024 16:20:36 -0700 Subject: [PATCH 2/2] Add sort util for 1D tensors (#2786) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/2786 This diff adds a simple sort utility that sorts a tensor's values and returns the sorted values and the sorted indices in the out tensors that are provided. There are currently two limitations to this sort: - It only supports 1D tensors currently, has to be extended to support 2D and greater tensors. - Input types are assumed to be float and it currently asserts on that. This has to be templatized to support all dtypes. Reviewed By: iseeyuan Differential Revision: D55577025 --- kernels/portable/cpu/util/sort_util.cpp | 73 +++++++++++++++++++ kernels/portable/cpu/util/sort_util.h | 25 +++++++ kernels/portable/cpu/util/targets.bzl | 11 +++ .../portable/cpu/util/test/sort_util_test.cpp | 45 ++++++++++++ kernels/portable/cpu/util/test/targets.bzl | 8 ++ 5 files changed, 162 insertions(+) create mode 100644 kernels/portable/cpu/util/sort_util.cpp create mode 100644 kernels/portable/cpu/util/sort_util.h create mode 100644 kernels/portable/cpu/util/test/sort_util_test.cpp diff --git a/kernels/portable/cpu/util/sort_util.cpp b/kernels/portable/cpu/util/sort_util.cpp new file mode 100644 index 00000000000..c57053a5088 --- /dev/null +++ b/kernels/portable/cpu/util/sort_util.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "executorch/kernels/portable/cpu/util/sort_util.h" +#include +#include + +namespace torch { +namespace executor { + +using Tensor = exec_aten::Tensor; + +Error sort_tensor( + const Tensor& tensor, + Tensor& sorted_tensor, + Tensor& sorted_indices, + bool descending) { + // Check if the input tensor is a valid input + ET_CHECK_MSG(tensor.dim() == 1, "Input tensor must be 1D"); + + // Check if the output tensors are valid + ET_CHECK_MSG(sorted_tensor.dim() == 1, "Output tensor must be 1D"); + ET_CHECK_MSG(sorted_indices.dim() == 1, "Output tensor must be 1D"); + + // Check if the output tensors have the same dtype + ET_CHECK_MSG( + tensor.scalar_type() == sorted_tensor.scalar_type(), + "Input and output tensors must have the same dtype"); + ET_CHECK_MSG( + tensor.scalar_type() == ScalarType::Float, + "Only float inputs are supported currently"); + ET_CHECK_MSG( + sorted_indices.scalar_type() == exec_aten::ScalarType::Long, + "Output tensor must be of type int64"); + + // Get the number of elements in the tensor + int size = tensor.numel(); + + // Create a tensor to store the indices + for (int i = 0; i < size; i++) { + sorted_indices.mutable_data_ptr()[i] = i; + } + + // Sort the indices based on the corresponding tensor values + std::sort( + sorted_indices.mutable_data_ptr(), + sorted_indices.mutable_data_ptr() + size, + [&tensor, descending](int64_t i, int64_t j) { + if (descending) { + return tensor.const_data_ptr()[i] > + tensor.const_data_ptr()[j]; + } else { + return tensor.const_data_ptr()[i] < + tensor.const_data_ptr()[j]; + } + }); + + // Rearrange the tensor values based on the sorted indices + for (int i = 0; i < size; i++) { + sorted_tensor.mutable_data_ptr()[i] = tensor.const_data_ptr< + float>()[sorted_indices.const_data_ptr()[i]]; + } + + return Error::Ok; +} + +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/sort_util.h b/kernels/portable/cpu/util/sort_util.h new file mode 100644 index 00000000000..9095490b327 --- /dev/null +++ b/kernels/portable/cpu/util/sort_util.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace torch { +namespace executor { + +using Tensor = exec_aten::Tensor; + +Error sort_tensor( + const Tensor& tensor, + Tensor& sorted_tensor, + Tensor& sorted_indice, + bool descending = false); + +} // namespace executor +} // namespace torch diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl index 3ee3ceff6dc..7212915c5f9 100644 --- a/kernels/portable/cpu/util/targets.bzl +++ b/kernels/portable/cpu/util/targets.bzl @@ -247,6 +247,17 @@ def define_common_targets(): visibility = ["//executorch/kernels/portable/cpu/..."], ) + runtime.cxx_library( + name = "sort_util", + srcs = ["sort_util.cpp"], + exported_headers = ["sort_util.h"], + deps = [ + "//executorch/runtime/kernel:kernel_includes", + "//executorch/runtime/core/exec_aten/util:tensor_util", + ], + visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/torchvision/..."], + ) + # Utility functions that can be used by operators that perform reduction for aten_mode in [True, False]: suffix = "_aten" if aten_mode else "" diff --git a/kernels/portable/cpu/util/test/sort_util_test.cpp b/kernels/portable/cpu/util/test/sort_util_test.cpp new file mode 100644 index 00000000000..e5dbfbd4b30 --- /dev/null +++ b/kernels/portable/cpu/util/test/sort_util_test.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include + +#include + +using namespace ::testing; +using exec_aten::ScalarType; +using exec_aten::Tensor; +using torch::executor::ArrayRef; +using torch::executor::testing::TensorFactory; + +TEST(SortUtilTest, SortTensorTest) { + TensorFactory tf; + TensorFactory lf; + + Tensor a = tf.make({4}, {3, 2, 1, 4}); + Tensor b = tf.zeros({4}); + Tensor c = lf.zeros({4}); + + // Ascending order sort test + sort_tensor(a, b, c); + + Tensor expected = tf.make({4}, {1, 2, 3, 4}); + Tensor expected_indices = lf.make({4}, {2, 1, 0, 3}); + EXPECT_TENSOR_EQ(b, expected); + EXPECT_TENSOR_EQ(c, expected_indices); + + // Descending order sort test + sort_tensor(a, b, c, true); + expected = tf.make({4}, {4, 3, 2, 1}); + expected_indices = lf.make({4}, {3, 0, 1, 2}); + EXPECT_TENSOR_EQ(b, expected); + EXPECT_TENSOR_EQ(c, expected_indices); +} diff --git a/kernels/portable/cpu/util/test/targets.bzl b/kernels/portable/cpu/util/test/targets.bzl index 39ac40fa603..45687fd28bb 100644 --- a/kernels/portable/cpu/util/test/targets.bzl +++ b/kernels/portable/cpu/util/test/targets.bzl @@ -29,5 +29,13 @@ def define_common_targets(): "//executorch/runtime/core/exec_aten:lib", "//executorch/kernels/portable/cpu/util:allocate_tensor_util", "//executorch/runtime/kernel:kernel_includes", + + runtime.cxx_test( + name = "sort_util_test", + srcs = ["sort_util_test.cpp"], + deps = [ + "//executorch/runtime/core/exec_aten:lib", + "//executorch/runtime/core/exec_aten/testing_util:tensor_util", + "//executorch/kernels/portable/cpu/util:sort_util", ], )