Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions kernels/portable/cpu/util/broadcast_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,15 @@ namespace executor {
using Tensor = exec_aten::Tensor;
using ScalarType = exec_aten::ScalarType;

void free_broadcast_tensor(const Tensor& broadcast_tensor) {
free((void*)broadcast_tensor.const_data_ptr());
free((void*)broadcast_tensor.sizes().data());
free((void*)broadcast_tensor.dim_order().data());
free((void*)broadcast_tensor.strides().data());
free(broadcast_tensor.unsafeGetTensorImpl());
void free_tensor(const Tensor& tensor) {
free((void*)tensor.const_data_ptr());
free((void*)tensor.sizes().data());
free((void*)tensor.dim_order().data());
free((void*)tensor.strides().data());
free(tensor.unsafeGetTensorImpl());
}

namespace {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's interesting that malloc is used in this function but I don't see how the malloced pointer is freed. @SS-JIA @manuelcandales , could you provide more context that I miss? cc @dbort

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a corresponding function called free_tensor that when passed in the tensor created via this function free's it up.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tarun292 These functions are not being used in the portable kernels. I believe you introduced these utils back in January 2023, and maybe they were being used at that time, but not anymore. They should be moved out of the portable lib utils.

Tensor make_tensor(
Tensor allocate_tensor(
const ArrayRef<Tensor::SizesType>& sizes,
const ArrayRef<Tensor::DimOrderType>& dim_order,
const ArrayRef<Tensor::StridesType>& strides,
Expand Down Expand Up @@ -73,8 +71,6 @@ Tensor make_tensor(
return Tensor{tensor_impl};
}

} // namespace

bool tensor_is_broadcastable_to(
const exec_aten::ArrayRef<Tensor::SizesType> broadcast_from_shape,
const exec_aten::ArrayRef<Tensor::SizesType> broadcast_to_shape) {
Expand Down Expand Up @@ -171,7 +167,7 @@ Tensor broadcast_tensor(

// Once we have discovered that broadcast_from can be broadcasted into
// broadcast_to, use repeat() to do the broadcast.
Tensor out = make_tensor(
Tensor out = allocate_tensor(
broadcast_to_shape,
broadcast_to_dim_order,
broadcast_to_strides,
Expand Down
33 changes: 26 additions & 7 deletions kernels/portable/cpu/util/broadcast_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,23 @@ bool tensors_are_broadcastable_between(
*/
bool tensors_are_broadcastable_between(const Tensor& a, const Tensor& b);

/**
* Create a new tensor with the given sizes, dim_order, and strides. Memory
* is dynamically allocated within this function and the tensor must be freed
* only using free_tensor.
*
* @param[in] sizes The sizes of the tensor.
* @param[in] dim_order The dim order of the tensor.
* @param[in] strides The strides of the tensor.
* @param[in] dtype The data type of the tensor.
* @returns A new tensor with the given sizes, dim_order, and strides.
*/
Tensor allocate_tensor(
const ArrayRef<Tensor::SizesType>& sizes,
const ArrayRef<Tensor::DimOrderType>& dim_order,
const ArrayRef<Tensor::StridesType>& strides,
const ScalarType& dtype);

/**
* DEPRECATED: Use `delinearize_index()` and `linearize_access_indexes()` for
* index remapping to avoid memory allocation.
Expand All @@ -75,7 +92,7 @@ bool tensors_are_broadcastable_between(const Tensor& a, const Tensor& b);
* @param[in] broadcast_to The tensor to which we want to broadcast to.
* @returns A new tensor with the same shape as broadcast_to and the data
* repeated as appropriate. This tensor contains dynamically allocated memory
* and must be freed using free_broadcast_tensor.
* and must be freed using free_tensor.
*/
ET_DEPRECATED exec_aten::Tensor broadcast_tensor(
const exec_aten::Tensor& broadcast_from,
Expand Down Expand Up @@ -192,19 +209,21 @@ ET_NODISCARD inline Error resize_to_broadcast_target_size(
}

/**
* DEPRECATED: Use `delinearize_index()` and `linearize_access_indexes()` for
* index remapping to avoid memory allocation.
*
* Free the dynamically allocated memory in broadcast_tensor. This should only
* be used on a tensor returned by broadcast_tensor.
*
* @param[in] The tensor that was previosuly returned by a call to
* broadcast_tensor.
* allocate_tensor.
* @returns void
*/
ET_DEPRECATED void free_broadcast_tensor(
const exec_aten::Tensor& broadcast_tensor);

/**
* Free the dynamically allocated memory in allocate_tensor. This should only
* be used on a tensor returned by allocate_tensor.
*
*/
void free_tensor(const exec_aten::Tensor& allocated_tensor);

/**
* Delinearize a flattened index to per-dimension indexes.
*
Expand Down
73 changes: 73 additions & 0 deletions kernels/portable/cpu/util/sort_util.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include "executorch/kernels/portable/cpu/util/sort_util.h"
#include <executorch/runtime/kernel/kernel_includes.h>
#include <algorithm>

namespace torch {
namespace executor {

using Tensor = exec_aten::Tensor;

Error sort_tensor(
const Tensor& tensor,
Tensor& sorted_tensor,
Tensor& sorted_indices,
bool descending) {
// Check if the input tensor is a valid input
ET_CHECK_MSG(tensor.dim() == 1, "Input tensor must be 1D");

// Check if the output tensors are valid
ET_CHECK_MSG(sorted_tensor.dim() == 1, "Output tensor must be 1D");
ET_CHECK_MSG(sorted_indices.dim() == 1, "Output tensor must be 1D");

// Check if the output tensors have the same dtype
ET_CHECK_MSG(
tensor.scalar_type() == sorted_tensor.scalar_type(),
"Input and output tensors must have the same dtype");
ET_CHECK_MSG(
tensor.scalar_type() == ScalarType::Float,
"Only float inputs are supported currently");
ET_CHECK_MSG(
sorted_indices.scalar_type() == exec_aten::ScalarType::Long,
"Output tensor must be of type int64");

// Get the number of elements in the tensor
int size = tensor.numel();

// Create a tensor to store the indices
for (int i = 0; i < size; i++) {
sorted_indices.mutable_data_ptr<int64_t>()[i] = i;
}

// Sort the indices based on the corresponding tensor values
std::sort(
sorted_indices.mutable_data_ptr<int64_t>(),
sorted_indices.mutable_data_ptr<int64_t>() + size,
[&tensor, descending](int64_t i, int64_t j) {
if (descending) {
return tensor.const_data_ptr<float>()[i] >
tensor.const_data_ptr<float>()[j];
} else {
return tensor.const_data_ptr<float>()[i] <
tensor.const_data_ptr<float>()[j];
}
});

// Rearrange the tensor values based on the sorted indices
for (int i = 0; i < size; i++) {
sorted_tensor.mutable_data_ptr<float>()[i] = tensor.const_data_ptr<
float>()[sorted_indices.const_data_ptr<int64_t>()[i]];
}

return Error::Ok;
}

} // namespace executor
} // namespace torch
25 changes: 25 additions & 0 deletions kernels/portable/cpu/util/sort_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/runtime/core/exec_aten/exec_aten.h>

namespace torch {
namespace executor {

using Tensor = exec_aten::Tensor;

Error sort_tensor(
const Tensor& tensor,
Tensor& sorted_tensor,
Tensor& sorted_indice,
bool descending = false);

} // namespace executor
} // namespace torch
11 changes: 11 additions & 0 deletions kernels/portable/cpu/util/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,17 @@ def define_common_targets():
visibility = ["//executorch/kernels/portable/cpu/..."],
)

runtime.cxx_library(
name = "sort_util",
srcs = ["sort_util.cpp"],
exported_headers = ["sort_util.h"],
deps = [
"//executorch/runtime/kernel:kernel_includes",
"//executorch/runtime/core/exec_aten/util:tensor_util",
],
visibility = ["//executorch/kernels/portable/cpu/...", "//executorch/kernels/torchvision/..."],
)

# Utility functions that can be used by operators that perform reduction
for aten_mode in [True, False]:
suffix = "_aten" if aten_mode else ""
Expand Down
8 changes: 4 additions & 4 deletions kernels/portable/cpu/util/test/broadcast_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ TEST(BroadcastUtilTest, BroadcastTensor) {

Tensor d = torch::executor::broadcast_tensor(a, c);
EXPECT_TENSOR_DATA_EQ(d, tf.make({2, 2}, {2, 2, 2, 2}));
torch::executor::free_broadcast_tensor(d);
torch::executor::free_tensor(d);

d = torch::executor::broadcast_tensor(b, c);
EXPECT_TENSOR_DATA_EQ(d, tf.make({2, 2}, {2, 2, 2, 2}));
torch::executor::free_broadcast_tensor(d);
torch::executor::free_tensor(d);
}

TEST(BroadcastUtilTest, BroadcastableBetween) {
Expand All @@ -69,12 +69,12 @@ TEST(BroadcastUtilTest, BroadcastableToFrom) {
ASSERT_TRUE(tensor_is_broadcastable_to(a, c));
Tensor d = torch::executor::broadcast_tensor(a, c);
EXPECT_TENSOR_DATA_EQ(d, tf.make({2, 2}, {2, 2, 2, 2}));
torch::executor::free_broadcast_tensor(d);
torch::executor::free_tensor(d);

ASSERT_TRUE(tensor_is_broadcastable_to(b, c));
d = torch::executor::broadcast_tensor(b, c);
EXPECT_TENSOR_DATA_EQ(d, tf.make({2, 2}, {2, 2, 2, 2}));
torch::executor::free_broadcast_tensor(d);
torch::executor::free_tensor(d);
}

TEST(BroadcastUtilTest, NotBroadcastableTo) {
Expand Down
45 changes: 45 additions & 0 deletions kernels/portable/cpu/util/test/sort_util_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/kernels/portable/cpu/util/sort_util.h>
#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
#include <executorch/runtime/core/exec_aten/util/tensor_util.h>
#include <executorch/test/utils/DeathTest.h>

#include <gtest/gtest.h>

using namespace ::testing;
using exec_aten::ScalarType;
using exec_aten::Tensor;
using torch::executor::ArrayRef;
using torch::executor::testing::TensorFactory;

TEST(SortUtilTest, SortTensorTest) {
TensorFactory<ScalarType::Float> tf;
TensorFactory<ScalarType::Long> lf;

Tensor a = tf.make({4}, {3, 2, 1, 4});
Tensor b = tf.zeros({4});
Tensor c = lf.zeros({4});

// Ascending order sort test
sort_tensor(a, b, c);

Tensor expected = tf.make({4}, {1, 2, 3, 4});
Tensor expected_indices = lf.make({4}, {2, 1, 0, 3});
EXPECT_TENSOR_EQ(b, expected);
EXPECT_TENSOR_EQ(c, expected_indices);

// Descending order sort test
sort_tensor(a, b, c, true);
expected = tf.make({4}, {4, 3, 2, 1});
expected_indices = lf.make({4}, {3, 0, 1, 2});
EXPECT_TENSOR_EQ(b, expected);
EXPECT_TENSOR_EQ(c, expected_indices);
}
10 changes: 10 additions & 0 deletions kernels/portable/cpu/util/test/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,13 @@ def define_common_targets():
"//executorch/kernels/portable/cpu/util:reduce_util",
],
)

runtime.cxx_test(
name = "sort_util_test",
srcs = ["sort_util_test.cpp"],
deps = [
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/core/exec_aten/testing_util:tensor_util",
"//executorch/kernels/portable/cpu/util:sort_util",
],
)
Loading