Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement kthvalue in ATen #17544

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
21 changes: 0 additions & 21 deletions aten/src/ATen/Declarations.cwrap
Expand Up @@ -800,27 +800,6 @@
- arg: bool keepdim
default: "false"
]]
[[
name: _th_kthvalue
backends:
- CPU
variants: function
cname: kthvalue
return: argument 0,1
scalar_check: self_->dim() == 0 || (keepdim == false && self_->dim() == 1)
arguments:
- arg: THTensor* values
output: True
- arg: THIndexTensor* indices
output: True
- THTensor* self
- long k
- arg: long dim
wrap_dim: self
default: __last_dim
- arg: bool keepdim
default: "false"
]]
[[
name: _th_mode
variants: function
Expand Down
195 changes: 195 additions & 0 deletions aten/src/ATen/native/Sorting.cpp
@@ -0,0 +1,195 @@
#include <ATen/ATen.h>
#include <ATen/Parallel.h>
#include <ATen/WrapDimUtils.h>
#include <ATen/native/SortingUtils.h>

namespace at {
namespace native {

namespace {

// maybe these days, one should define a random access iterator and use
// std::sort...
/* Note from TH:

I cut and pasted (slightly adapted) the quicksort code from
Sedgewick's 1978 "Implementing Quicksort Programs" article
http://www.csie.ntu.edu.tw/~b93076/p847-sedgewick.pdf

It is the state of the art existing implementation. The macros
are here to make as close a match as possible to the pseudocode of
Program 2 p.851

Note that other partition schemes exist, and are typically presented
in textbook, but those are less efficient. See e.g.
http://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto

Julien, November 12th 2013
*/

constexpr int64_t MAX_LEVELS = 300;
constexpr int64_t M_SMALL = 10; // Limit for small subfiles

template <typename Fn>
void dim_apply(TensorList tensors, int64_t dim, Fn f) {
AT_ASSERT(tensors.size() > 0);
auto t = tensors[0];
auto sizes = t.sizes();
int64_t ndim = t.dim();
int64_t itersize = 1;
for (int64_t i = 0; i < ndim; i++) {
if (i != dim) {
itersize *= t.size(i);
}
}
parallel_for(0, itersize, 1, [&](int64_t i_begin, int64_t i_end) {
std::vector<Tensor> narrowed_tensors;
narrowed_tensors.reserve(tensors.size());
for (int64_t it = i_begin; it < i_end; it++) {
narrowed_tensors.clear();
for (auto ti : tensors) {
int64_t i = it;
Tensor nt = ti;
for (size_t d = 0; d < ndim; d++) {
if (d != dim) {
// this could be avoided for slower-changing dimensions if done
// better
nt = nt.select((d > dim ? 1 : 0), i % sizes[d]);
i = i / sizes[d];
}
}
narrowed_tensors.emplace_back(nt);
}
f(it, narrowed_tensors);
}
});
}

template <typename scalar_t, typename Fn>
void quick_select_template(
TensorAccessor<scalar_t, 1> arr,
int64_t k,
Fn swap_fn) {
int64_t P, L, R, i, j, swap;
scalar_t rswap, piv;
L = 0;
R = arr.size(0) - 1;

do {
if (R <= L) // One element only
return;

if (R == L + 1) { // Two elements only
if (arr[L] > arr[R]) {
swap_fn(L, R);
}
return;
}

// Use median of three for pivot choice
P = (L + R) >> 1;
swap_fn(P, L + 1);
if (arr[L + 1] > arr[R]) {
swap_fn(L + 1, R);
}
if (arr[L] > arr[R]) {
swap_fn(L, R);
}
if (arr[L + 1] > arr[L]) {
swap_fn(L + 1, L);
}

i = L + 1;
j = R;
piv = arr[L];
do {
do
i++;
while (arr[i] < piv);
do
j--;
while (arr[j] > piv);
if (j < i)
break;
swap_fn(i, j);
} while (1);
swap_fn(L, j);

// Re-set active partition
if (j <= k)
L = i;
if (j >= k)
R = j - 1;
} while (1);
}

} // namespace

std::tuple<Tensor&, Tensor&> kthvalue_out_cpu(
Tensor& values,
Tensor& indices,
const Tensor& self,
int64_t k,
int64_t dim_,
bool keepdim) {
int64_t dim = maybe_wrap_dim(dim_, self.dim(), /*wrap_scalar=*/true);
// FIXME: This seems bogus, I only do this because it was the old behaviour.
// The reductions are fine, as long as the axis being reduced along
// isn't of 0 elements (and the output has elements).
AT_CHECK(
self.numel() > 0,
"cannot perform reduction function kthvalue",
" on tensor with no elements because the operation does not have an identity");
AT_CHECK(
k > 0 && k <= (self.dim() > 0 ? self.size(dim) : 1),
"selected index k out of range");

_reduction_with_indices_allocate_or_resize_output(
values, indices, self, dim_, keepdim);
if (self.dim() == 0 && self.numel() == 1) {
values.copy_(self);
indices.zero_();
return std::forward_as_tuple(values, indices);
}
auto tmp_values = self.clone();
auto tmp_indices = at::empty(self.sizes(), self.options().dtype(kLong));
AT_DISPATCH_ALL_TYPES(self.type(), "kthvalue", [&] {
dim_apply(
{tmp_values, tmp_indices, values, indices},
dim,
[&](int64_t i, TensorList tl) {
auto tmp_values = tl[0].accessor<scalar_t, 1>();
auto tmp_indices = tl[1].accessor<int64_t, 1>();
scalar_t* mode_value = tl[2].data<scalar_t>();
int64_t* mode_index = tl[3].data<int64_t>();
for (int64_t j = 0; j < tmp_indices.size(0); j++) {
tmp_indices[j] = j;
}
quick_select_template(tmp_values, k - 1, [&](int64_t i, int64_t j) {
std::swap(tmp_values[i], tmp_values[j]);
std::swap(tmp_indices[i], tmp_indices[j]);
});
*mode_value = tmp_values[k - 1];
*mode_index = tmp_indices[k - 1];
});
});
if (!keepdim) {
values.squeeze_(dim);
indices.squeeze_(dim);
}
return std::forward_as_tuple(values, indices);
}

std::tuple<Tensor, Tensor> kthvalue(
const Tensor& self,
int64_t k,
int64_t dim,
bool keepdim) {
Tensor values = at::empty({0}, self.options());
Tensor indices = at::empty({0}, self.options().dtype(kLong));
at::kthvalue_out(values, indices, self, k, dim, keepdim);
return std::make_tuple(values, indices);
}

} // namespace native
} // namespace at
48 changes: 48 additions & 0 deletions aten/src/ATen/native/SortingUtils.h
@@ -0,0 +1,48 @@
#pragma once

namespace at {
namespace native {

// ensure we get good values and indices for kthvalue, mode, median
// this will always be with the reducing dim as 1-d
static void _reduction_with_indices_allocate_or_resize_output(
Tensor& values,
Tensor& indices,
const Tensor& self,
int64_t dim_,
bool keepdim) {
int64_t dim = maybe_wrap_dim(dim_, self.dim(), /*wrap_scalar=*/true);
auto result_sizes = self.sizes().vec();
if (result_sizes.size() > 0) {
result_sizes[dim] = 1;
}
if (values.defined()) {
AT_CHECK(
self.type() == values.type(),
"output values must be of same type as input");
if (!keepdim && values.dim() == self.dim() - 1) {
// unsqueeze to preserve passed in noncontiguous tensor in resize
values.unsqueeze_(dim);
}
values.resize_(result_sizes);
} else {
values = at::empty(result_sizes, self.options());
}
if (indices.defined()) {
AT_CHECK(
indices.dtype() == kLong, "output indices must be of scalar type Long");
AT_CHECK(
indices.device() == self.device(),
"output indices must be on same device as input");
if (!keepdim && indices.dim() == self.dim() - 1) {
// unsqueeze to preserve passed in noncontiguous tensor in resize
indices.unsqueeze_(dim);
}
indices.resize_(result_sizes);
} else {
indices = at::empty(result_sizes, self.options().dtype(kLong));
}
}

} // namespace native
} // namespace at
20 changes: 0 additions & 20 deletions aten/src/ATen/native/TensorCompare.cpp
Expand Up @@ -97,26 +97,6 @@ Tensor _s_where_cpu(const Tensor& condition, const Tensor& self, const Tensor& o
return ret;
}

std::tuple<Tensor, Tensor> kthvalue(const Tensor& self, int64_t k, int64_t dim, bool keepdim) {
Tensor values = at::empty({0}, self.options());
Tensor indices = at::empty({0}, self.options().dtype(kLong));
return at::native::kthvalue_out(values, indices, self, k, dim, keepdim);
}

std::tuple<Tensor &,Tensor &> kthvalue_out(Tensor& values, Tensor& indices,
const Tensor& self, int64_t k, int64_t dim, bool keepdim) {
AT_CHECK(self.type().backend() == Backend::CPU || self.type().backend() == Backend::CUDA,
"kthvalue only supports CPU AND CUDA backend, got: ", toString(self.type().backend()));
dim = maybe_wrap_dim(dim, self.dim());
if (_dimreduce_return_trivial_no_ident(values, self, dim, keepdim, "kthvalue")) {
AT_ASSERT(values.dim() == 0);
indices.resize_({}).fill_(0);
return std::forward_as_tuple(values, indices);
} else {
return at::legacy::th::_th_kthvalue_out(values, indices, self, k, dim, keepdim);
}
}

std::tuple<Tensor, Tensor> median(const Tensor& self, int64_t dim, bool keepdim) {
Tensor values = at::empty({0}, self.options());
Tensor indices = at::empty({0}, self.options().dtype(kLong));
Expand Down