From 7d7d1d69822091d9d88809a69948f575721058df Mon Sep 17 00:00:00 2001 From: Manuel Candales Date: Wed, 5 Nov 2025 14:36:11 -0800 Subject: [PATCH] Enable fast path for negative indices Differential Revision: D86351194 --- kernels/portable/cpu/op_index.cpp | 28 +++++----------- kernels/test/op_index_test.cpp | 53 +++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/kernels/portable/cpu/op_index.cpp b/kernels/portable/cpu/op_index.cpp index 6ce9fb375de..d8eb992b85a 100644 --- a/kernels/portable/cpu/op_index.cpp +++ b/kernels/portable/cpu/op_index.cpp @@ -49,23 +49,6 @@ bool check_fast_path_conditions( if (index.dim() != 1) { return false; } - - // Fast path only supports non-negative indices. - if (ix_type == ScalarType::Int) { - const int32_t* const data = index.const_data_ptr(); - if (std::any_of(data, data + index.numel(), [](const auto x) { - return x < 0; - })) { - return false; - } - } else { // ScalarType::Long - const int64_t* const data = index.const_data_ptr(); - if (std::any_of(data, data + index.numel(), [](const auto x) { - return x < 0; - })) { - return false; - } - } } } @@ -96,8 +79,10 @@ bool check_fast_path_args( Long, Int, index.scalar_type(), ctx, "index.Tensor", CTYPE, [&]() { const CTYPE* const index_arr = index.const_data_ptr(); for (const auto i : c10::irange(index.numel())) { - if (index_arr[i] < 0 || - index_arr[i] >= static_cast(in.size(dim))) { + CTYPE index_val = index_arr[i]; + CTYPE dim_size = static_cast(in.size(dim)); + index_val = index_val < 0 ? index_val + dim_size : index_val; + if (index_val < 0 || index_val >= dim_size) { ET_LOG( Error, "Index %" PRId64 @@ -189,11 +174,14 @@ Tensor& fast_path( ET_SWITCH_TWO_TYPES(Long, Int, index_type, ctx, op_name, CTYPE, [&]() { const CTYPE* const index_arr = index.const_data_ptr(); + CTYPE dim_size = static_cast(in.size(dim)); for (const auto i : c10::irange(leading_dims)) { const char* src = in_data + i * in_dim_length * length_per_step; char* dest = out_data + i * out_dim_length * length_per_step; for (const auto j : c10::irange(out_dim_length)) { - const char* copy_src = src + index_arr[j] * length_per_step; + auto index_val = + index_arr[j] < 0 ? index_arr[j] + dim_size : index_arr[j]; + const char* copy_src = src + index_val * length_per_step; char* copy_dest = dest + j * length_per_step; memcpy(copy_dest, copy_src, length_per_step); } diff --git a/kernels/test/op_index_test.cpp b/kernels/test/op_index_test.cpp index 787eb4612d8..8816d0a8d3f 100644 --- a/kernels/test/op_index_test.cpp +++ b/kernels/test/op_index_test.cpp @@ -947,3 +947,56 @@ TEST_F(OpIndexTensorOutTest, FastPathEmptyInput) { EXPECT_TENSOR_EQ(out, expected); } + +TEST_F(OpIndexTensorOutTest, FastPathNegativeIndex) { + TensorFactory tf; + TensorFactory tfl; + + // clang-format off + Tensor x = tf.make( + {2, 3, 4}, + { + // [0, :, :] + 1., 2., 3., 4., // [0, 0, :] + 5., 6., 7., 8., // [0, 1, :] + 9., 10., 11., 12., // [0, 2, :] + + // [1, :, :] + -1., -2., -3., -4., // [1, 0, :] + -5., -6., -7., -8., // [1, 1, :] + -9., -10., -11., -12., // [1, 2, :] + }); + // clang-format on + + // Use negative indices in the first dimension: -1, 0, -2 + std::array, 3> indices = { + optional(tfl.make({3}, {-1, 0, -2})), + optional(), + optional()}; + + Tensor out = tf.zeros({3, 3, 4}); + // clang-format off + Tensor expected = tf.make( + {3, 3, 4}, + { + // [1, :, :] + -1., -2., -3., -4., // [1, 0, :] + -5., -6., -7., -8., // [1, 1, :] + -9., -10., -11., -12., // [1, 2, :] + + // [0, :, :] + 1., 2., 3., 4., // [0, 0, :] + 5., 6., 7., 8., // [0, 1, :] + 9., 10., 11., 12., // [0, 2, :] + + // [0, :, :] again (since -2 wraps to 0) + 1., 2., 3., 4., // [0, 0, :] + 5., 6., 7., 8., // [0, 1, :] + 9., 10., 11., 12., // [0, 2, :] + }); + // clang-format on + + op_index_tensor_out(x, indices, out); + + EXPECT_TENSOR_EQ(out, expected); +}