Skip to content

Commit

Permalink
Add stack & cat support for CPU Half (#16389)
Browse files Browse the repository at this point in the history
Summary:
Fixes #6968

Needed for #14705
Pull Request resolved: #16389

Differential Revision: D13861446

Pulled By: gchanan

fbshipit-source-id: 7b8700b95aaf252d9669693dbddccb2302e58409
  • Loading branch information
ssnl authored and facebook-github-bot committed Jan 29, 2019
1 parent d79e45b commit ded6fb0
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 187 deletions.
1 change: 1 addition & 0 deletions aten/src/ATen/Declarations.cwrap
Expand Up @@ -2872,6 +2872,7 @@
name: _th_cat
cname: catArray
variants: [function]
cpu_half: True
return: self
arguments:
- arg: THTensor* self
Expand Down
125 changes: 125 additions & 0 deletions aten/src/TH/generic/THTensor.cpp
Expand Up @@ -668,6 +668,131 @@ scalar_t THTensor_(get4d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_
return THStorage_(get)(THTensor_getStoragePtr(tensor), tensor->storage_offset()+x0*tensor->stride(0)+x1*tensor->stride(1)+x2*tensor->stride(2)+x3*tensor->stride(3));
}


/* Shape manipulation methods */
void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension)
{
THTensor* inputs[2];
inputs[0] = ta;
inputs[1] = tb;
THTensor_(catArray)(r_, inputs, 2, dimension);
}

void THTensor_(check_shape_except_dim)(THTensor *first, THTensor *second, int dimension);
inline void THTensor_(check_shape_except_dim)(THTensor *first, THTensor *second, int dimension)
{
int first_dims = first->dim();
int second_dims = second->dim();
THArgCheck(first_dims == second_dims, 0,
"Tensors must have same number of dimensions: got %d and %d",
first_dims, second_dims);
for (int dim = 0; dim < first_dims; dim++) {
if (dim == dimension) {
continue;
}
int64_t first_dim_size = first->size(dim);
int64_t second_dim_size = second->size(dim);
THArgCheck(first_dim_size == second_dim_size, 0,
"Sizes of tensors must match except in dimension %d. Got %lld and %lld in dimension %d",
dimension, (long long)first_dim_size, (long long)second_dim_size, dim);
}
}

void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension)
{
// previously, size [0] tensors were the only possible empty tensors; thus, it wasn't possible
// to cat empty tensors unless all the other tensors were 1-dimensional, so we allowed these tensors
// to be "skipped". We maintain this behavior for backwards compatibility, but only for this specific
// size (i.e. other empty sizes are not skipped).
// FIXME: warn if this is the case
bool allSkipped= true;
int64_t nDims = 0;
THTensor *notSkippedTensor; // non-owning reference
auto should_skip = [](THTensor *t) { return t->is_empty() && t->dim() == 1; };
for (int i = 0; i < numInputs; i++) {
if (should_skip(inputs[i])) {
continue;
}
// We've found a non-empty tensor
allSkipped = false;
notSkippedTensor = inputs[i];
nDims = notSkippedTensor->dim();
break;
}
if (allSkipped) {
return;
}

// Compute cat_dimension based on the non-empty tensor
THArgCheck(dimension < nDims, 4, "invalid dimension %d", dimension);
THArgCheck(numInputs > 0, 3, "invalid number of inputs %d", numInputs);

// Compute size of the result in the cat dimension
int64_t cat_dim_size = 0;
for (int i = 0; i < numInputs; i++) {
THTensor *tensor = inputs[i];
if (should_skip(tensor)) {
continue;
}
THTensor_(check_shape_except_dim)(notSkippedTensor, tensor, dimension);
cat_dim_size += tensor->size(dimension);
}

// Compute the size of the result
std::vector<int64_t> size(nDims);
for (int dim = 0; dim < nDims; dim++) {
int64_t result_dim_size = notSkippedTensor->size(dim);
if (dim == dimension) {
result_dim_size = cat_dim_size;
}
size[dim] = result_dim_size;
}
THTensor_(resize)(result, size, {});

// Check contiguity of all inputs and result
bool allContiguous = true;
for (int i = 0; i < numInputs; i++) {
if(!should_skip(inputs[i])) {
allContiguous = allContiguous && THTensor_(isContiguous)(inputs[i]);
}
}
allContiguous = allContiguous && THTensor_(isContiguous)(result);

// First path is for contiguous inputs along dim 0
// Second path for non-contiguous
int64_t offset;
if (dimension == 0 && allContiguous) {
scalar_t* result_data = THStorage_(data)(THTensor_getStoragePtr(result)) + result->storage_offset();
offset = 0;
for (int j = 0; j < numInputs; j++) {
if (!should_skip(inputs[j])) {
THTensor* input0 = inputs[j];
scalar_t* input0_data = THStorage_(data)(THTensor_getStoragePtr(input0)) + input0->storage_offset();
int64_t input0_size = THTensor_(nElement)(input0);
// C standard says you can't pass nullptrs to memcpy, even if the size is 0; ubsan checks this.
if (input0_size != 0) {
memcpy(result_data + offset, input0_data, input0_size*sizeof(scalar_t));
}
offset += input0_size;
}
}
} else {
offset = 0;
for (int j = 0; j < numInputs; j++) {
if (!should_skip(inputs[j])) {
int64_t dimSize = inputs[j]->size(dimension);
THTensor *nt = THTensor_(newWithTensor)(result);
THTensor_(narrow)(nt, NULL, dimension, offset, dimSize);
at::Tensor nt__wrap = THTensor_wrap(nt);
at::Tensor inputs_wrap = THTensor_wrap(inputs[j]);
at::_copy_same_type_(nt__wrap, inputs_wrap);
c10::raw::intrusive_ptr::decref(nt);
offset += dimSize;
}
}
}
}

THDescBuff THTensor_(desc)(const THTensor *tensor) {
const int L = TH_DESC_BUFF_LEN;
THDescBuff buf;
Expand Down
4 changes: 4 additions & 0 deletions aten/src/TH/generic/THTensor.h
Expand Up @@ -125,6 +125,10 @@ TH_API scalar_t THTensor_(get2d)(const THTensor *tensor, int64_t x0, int64_t x1)
TH_API scalar_t THTensor_(get3d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2);
TH_API scalar_t THTensor_(get4d)(const THTensor *tensor, int64_t x0, int64_t x1, int64_t x2, int64_t x3);

/* Shape manipulation methods */
TH_API void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension);
TH_API void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension);

/* Debug methods */
TH_API THDescBuff THTensor_(desc)(const THTensor *tensor);
TH_API THDescBuff THTensor_(sizeDesc)(const THTensor *tensor);
Expand Down
2 changes: 0 additions & 2 deletions aten/src/TH/generic/THTensorMath.h
Expand Up @@ -103,8 +103,6 @@ TH_API void THTensor_(randperm)(THTensor *r_, THGenerator *_generator, int64_t n
TH_API void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimension, int descendingOrder);
TH_API void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int64_t k, int dim, int dir, int sorted);
TH_API void THTensor_(triu)(THTensor *r_, THTensor *t, int64_t k);
TH_API void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension);
TH_API void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension);

TH_API int THTensor_(equal)(THTensor *ta, THTensor *tb);

Expand Down
123 changes: 0 additions & 123 deletions aten/src/TH/generic/THTensorMoreMath.cpp
Expand Up @@ -1238,129 +1238,6 @@ void THTensor_(triu)(THTensor *r_, THTensor *t, int64_t k)
}
}

void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension)
{
THTensor* inputs[2];
inputs[0] = ta;
inputs[1] = tb;
THTensor_(catArray)(r_, inputs, 2, dimension);
}

void THTensor_(check_shape_except_dim)(THTensor *first, THTensor *second, int dimension);
inline void THTensor_(check_shape_except_dim)(THTensor *first, THTensor *second, int dimension)
{
int first_dims = first->dim();
int second_dims = second->dim();
THArgCheck(first_dims == second_dims, 0,
"Tensors must have same number of dimensions: got %d and %d",
first_dims, second_dims);
for (int dim = 0; dim < first_dims; dim++) {
if (dim == dimension) {
continue;
}
int64_t first_dim_size = first->size(dim);
int64_t second_dim_size = second->size(dim);
THArgCheck(first_dim_size == second_dim_size, 0,
"Sizes of tensors must match except in dimension %d. Got %lld and %lld in dimension %d",
dimension, (long long)first_dim_size, (long long)second_dim_size, dim);
}
}

void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension)
{
// previously, size [0] tensors were the only possible empty tensors; thus, it wasn't possible
// to cat empty tensors unless all the other tensors were 1-dimensional, so we allowed these tensors
// to be "skipped". We maintain this behavior for backwards compatibility, but only for this specific
// size (i.e. other empty sizes are not skipped).
// FIXME: warn if this is the case
bool allSkipped= true;
int64_t nDims = 0;
THTensor *notSkippedTensor; // non-owning reference
auto should_skip = [](THTensor *t) { return t->is_empty() && t->dim() == 1; };
for (int i = 0; i < numInputs; i++) {
if (should_skip(inputs[i])) {
continue;
}
// We've found a non-empty tensor
allSkipped = false;
notSkippedTensor = inputs[i];
nDims = notSkippedTensor->dim();
break;
}
if (allSkipped) {
return;
}

// Compute cat_dimension based on the non-empty tensor
THArgCheck(dimension < nDims, 4, "invalid dimension %d", dimension);
THArgCheck(numInputs > 0, 3, "invalid number of inputs %d", numInputs);

// Compute size of the result in the cat dimension
int64_t cat_dim_size = 0;
for (int i = 0; i < numInputs; i++) {
THTensor *tensor = inputs[i];
if (should_skip(tensor)) {
continue;
}
THTensor_(check_shape_except_dim)(notSkippedTensor, tensor, dimension);
cat_dim_size += tensor->size(dimension);
}

// Compute the size of the result
std::vector<int64_t> size(nDims);
for (int dim = 0; dim < nDims; dim++) {
int64_t result_dim_size = notSkippedTensor->size(dim);
if (dim == dimension) {
result_dim_size = cat_dim_size;
}
size[dim] = result_dim_size;
}
THTensor_(resize)(result, size, {});

// Check contiguity of all inputs and result
bool allContiguous = true;
for (int i = 0; i < numInputs; i++) {
if(!should_skip(inputs[i])) {
allContiguous = allContiguous && THTensor_(isContiguous)(inputs[i]);
}
}
allContiguous = allContiguous && THTensor_(isContiguous)(result);

// First path is for contiguous inputs along dim 0
// Second path for non-contiguous
int64_t offset;
if (dimension == 0 && allContiguous) {
scalar_t* result_data = THStorage_(data)(THTensor_getStoragePtr(result)) + result->storage_offset();
offset = 0;
for (int j = 0; j < numInputs; j++) {
if (!should_skip(inputs[j])) {
THTensor* input0 = inputs[j];
scalar_t* input0_data = THStorage_(data)(THTensor_getStoragePtr(input0)) + input0->storage_offset();
int64_t input0_size = THTensor_(nElement)(input0);
// C standard says you can't pass nullptrs to memcpy, even if the size is 0; ubsan checks this.
if (input0_size != 0) {
memcpy(result_data + offset, input0_data, input0_size*sizeof(scalar_t));
}
offset += input0_size;
}
}
} else {
offset = 0;
for (int j = 0; j < numInputs; j++) {
if (!should_skip(inputs[j])) {
int64_t dimSize = inputs[j]->size(dimension);
THTensor *nt = THTensor_(newWithTensor)(result);
THTensor_(narrow)(nt, NULL, dimension, offset, dimSize);
at::Tensor nt__wrap = THTensor_wrap(nt);
at::Tensor inputs_wrap = THTensor_wrap(inputs[j]);
at::_copy_same_type_(nt__wrap, inputs_wrap);
c10::raw::intrusive_ptr::decref(nt);
offset += dimSize;
}
}
}
}

int THTensor_(equal)(THTensor *ta, THTensor* tb)
{
int equal = 1;
Expand Down
36 changes: 21 additions & 15 deletions test/common_utils.py
Expand Up @@ -392,22 +392,28 @@ def assertEqual(self, x, y, prec=None, message='', allow_inf=False):
def assertTensorsEqual(a, b):
super(TestCase, self).assertEqual(a.size(), b.size(), message)
if a.numel() > 0:
b = b.type_as(a)
b = b.cuda(device=a.get_device()) if a.is_cuda else b.cpu()
# check that NaNs are in the same locations
nan_mask = a != a
self.assertTrue(torch.equal(nan_mask, b != b), message)
if a.device.type == 'cpu' and a.dtype == torch.float16:
# CPU half tensors don't have the methods we need below
a = a.to(torch.float32)
if TEST_WITH_ROCM:
# Workaround for bug https://github.com/pytorch/pytorch/issues/16448
# TODO: remove after the bug is resolved.
b = b.to(a.dtype).to(a.device)
else:
b = b.to(a)
diff = a - b
diff[nan_mask] = 0
# inf check if allow_inf=True
if allow_inf:
inf_mask = (a == float("inf")) | (a == float("-inf"))
self.assertTrue(torch.equal(inf_mask,
(b == float("inf")) | (b == float("-inf"))),
message)
diff[inf_mask] = 0
# TODO: implement abs on CharTensor
if diff.is_signed() and 'CharTensor' not in diff.type():
if a.is_floating_point():
# check that NaNs are in the same locations
nan_mask = torch.isnan(a)
self.assertTrue(torch.equal(nan_mask, torch.isnan(b)), message)
diff[nan_mask] = 0
# inf check if allow_inf=True
if allow_inf:
inf_mask = torch.isinf(a)
self.assertTrue(torch.equal(inf_mask, torch.isinf(b)), message)
diff[inf_mask] = 0
# TODO: implement abs on CharTensor (int8)
if diff.is_signed() and diff.dtype != torch.int8:
diff = diff.abs()
max_err = diff.max()
self.assertLessEqual(max_err, prec, message)
Expand Down

0 comments on commit ded6fb0

Please sign in to comment.