diff --git a/.travis.yml b/.travis.yml index e14af9c0bcd..27afe1fd6f5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,11 +2,19 @@ dist: bionic # ubuntu 18.04 language: python python: - - "3.5" - "3.6" - "3.7" -env: CUDA=10.1.105-1 CUDA_SHORT=10.1 UBUNTU_VERSION=ubuntu1804 FORCE_CUDA=1 +env: + global: + - CUDA=10.1.105-1 + - CUDA_SHORT=10.1 + - UBUNTU_VERSION=ubuntu1804 + - FORCE_CUDA=1 + matrix: + - TORCH=1.3.1 TORCHVISION=0.4.2 CUDA_ARCH=6.0 + - TORCH=1.5.0 TORCHVISION=0.6.0 CUDA_ARCH=7.0 + cache: pip # Ref to CUDA installation in Travis: https://github.com/jeremad/cuda-travis @@ -25,7 +33,7 @@ before_install: install: - pip install Pillow==6.2.2 # remove this line when torchvision>=0.5 - - pip install torch==1.2 torchvision==0.4.0 # TODO: fix CI for pytorch>1.2 + - pip install torch==${TORCH} torchvision==${TORCHVISION} - pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI" - pip install -r requirements.txt @@ -36,7 +44,7 @@ before_script: script: - python setup.py check -m -s - - python setup.py build_ext --inplace + - TORCH_CUDA_ARCH_LIST="${CUDA_ARCH}" python setup.py build_ext --inplace - coverage run --branch --source mmdet -m py.test -v --xdoctest-modules tests mmdet after_success: diff --git a/mmdet/models/mask_heads/fcn_mask_head.py b/mmdet/models/mask_heads/fcn_mask_head.py index 30090d52bfc..62849a5824f 100644 --- a/mmdet/models/mask_heads/fcn_mask_head.py +++ b/mmdet/models/mask_heads/fcn_mask_head.py @@ -2,12 +2,12 @@ import pycocotools.mask as mask_util import torch import torch.nn as nn +import torch.nn.functional as F from torch.nn.modules.utils import _pair from mmdet.core import auto_fp16, force_fp32, mask_target from mmdet.ops import Conv2d, ConvModule, build_upsample_layer from mmdet.ops.carafe import CARAFEPack -from mmdet.ops.grid_sampler import grid_sample from ..builder import HEADS, build_loss BYTES_PER_FLOAT = 4 @@ -302,7 +302,7 @@ def _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True): gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1)) grid = torch.stack([gx, gy], dim=3) - img_masks = grid_sample( + img_masks = F.grid_sample( masks.to(dtype=torch.float32), grid, align_corners=False) if skip_empty: diff --git a/mmdet/ops/affine_grid/__init__.py b/mmdet/ops/affine_grid/__init__.py deleted file mode 100644 index 8530ade3384..00000000000 --- a/mmdet/ops/affine_grid/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .affine_grid import affine_grid - -__all__ = ['affine_grid'] diff --git a/mmdet/ops/affine_grid/affine_grid.py b/mmdet/ops/affine_grid/affine_grid.py deleted file mode 100644 index 7c24fa7991e..00000000000 --- a/mmdet/ops/affine_grid/affine_grid.py +++ /dev/null @@ -1,68 +0,0 @@ -import torch -import torch.nn.functional as F -from torch.autograd import Function -from torch.autograd.function import once_differentiable - -from . import affine_grid_ext - - -class _AffineGridGenerator(Function): - - @staticmethod - def forward(ctx, theta, size, align_corners): - - ctx.save_for_backward(theta) - ctx.size = size - ctx.align_corners = align_corners - - func = affine_grid_ext.affine_grid_generator_forward - - output = func(theta, size, align_corners) - - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - theta = ctx.saved_tensors - size = ctx.size - align_corners = ctx.align_corners - - func = affine_grid_ext.affine_grid_generator_backward - - grad_input = func(grad_output, theta, size, align_corners) - - return grad_input, None, None - - -def affine_grid(theta, size, align_corners=False): - if torch.__version__ >= '1.3': - return F.affine_grid(theta, size, align_corners) - elif align_corners: - return F.affine_grid(theta, size) - else: - # enforce floating point dtype on theta - if not theta.is_floating_point(): - raise ValueError( - 'Expected theta to have floating point type, but got {}'. - format(theta.dtype)) - # check that shapes and sizes match - if len(size) == 4: - if theta.dim() != 3 or theta.size(-2) != 2 or theta.size(-1) != 3: - raise ValueError( - 'Expected a batch of 2D affine matrices of shape Nx2x3 ' - 'for size {}. Got {}.'.format(size, theta.shape)) - elif len(size) == 5: - if theta.dim() != 3 or theta.size(-2) != 3 or theta.size(-1) != 4: - raise ValueError( - 'Expected a batch of 3D affine matrices of shape Nx3x4 ' - 'for size {}. Got {}.'.format(size, theta.shape)) - else: - raise NotImplementedError( - 'affine_grid only supports 4D and 5D sizes, ' - 'for 2D and 3D affine transforms, respectively. ' - 'Got size {}.'.format(size)) - if min(size) <= 0: - raise ValueError( - 'Expected non-zero, positive output size. Got {}'.format(size)) - return _AffineGridGenerator.apply(theta, size, align_corners) diff --git a/mmdet/ops/affine_grid/src/affine_grid_ext.cpp b/mmdet/ops/affine_grid/src/affine_grid_ext.cpp deleted file mode 100644 index cc5c80d780c..00000000000 --- a/mmdet/ops/affine_grid/src/affine_grid_ext.cpp +++ /dev/null @@ -1,23 +0,0 @@ -// Modified from https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/AffineGridGenerator.cpp -#include -#include -#include - -namespace mmdetection { - -using namespace at; - -Tensor affine_grid_generator_forward(const Tensor &theta, IntArrayRef size, - bool align_corners); - -Tensor affine_grid_generator_backward(const Tensor &grad, IntArrayRef size, - bool align_corners); - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { -m.def("affine_grid_generator_forward", &affine_grid_generator_forward, -"affine_grid_generator_forward"); -m.def("affine_grid_generator_backward", &affine_grid_generator_backward, -"affine_grid_generator_backward"); -} - -} // namespace mmdetection diff --git a/mmdet/ops/affine_grid/src/cpu/affine_grid_cpu.cpp b/mmdet/ops/affine_grid/src/cpu/affine_grid_cpu.cpp deleted file mode 100644 index 51434604fd6..00000000000 --- a/mmdet/ops/affine_grid/src/cpu/affine_grid_cpu.cpp +++ /dev/null @@ -1,108 +0,0 @@ -// Modified from https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/AffineGridGenerator.cpp -#include -#include -#include - -namespace mmdetection { - -using namespace at; - -at::Tensor linspace_from_neg_one(const Tensor& grid, int64_t num_steps, - bool align_corners) { - if (num_steps <= 1) { - return at::tensor(0, grid.options()); - } - auto range = at::linspace(-1, 1, num_steps, grid.options()); - if (!align_corners) { - range = range * (num_steps - 1) / num_steps; - } - return range; -} - -Tensor make_base_grid_4D(const Tensor& theta, int64_t N, int64_t C, int64_t H, - int64_t W, bool align_corners) { - auto base_grid = at::empty({N, H, W, 3}, theta.options()); - - base_grid.select(-1, 0).copy_(linspace_from_neg_one(theta, W, align_corners)); - base_grid.select(-1, 1).copy_( - linspace_from_neg_one(theta, H, align_corners).unsqueeze_(-1)); - base_grid.select(-1, 2).fill_(1); - - return base_grid; -} - -Tensor make_base_grid_5D(const Tensor& theta, int64_t N, int64_t C, int64_t D, - int64_t H, int64_t W, bool align_corners) { - auto base_grid = at::empty({N, D, H, W, 4}, theta.options()); - - base_grid.select(-1, 0).copy_(linspace_from_neg_one(theta, W, align_corners)); - base_grid.select(-1, 1).copy_( - linspace_from_neg_one(theta, H, align_corners).unsqueeze_(-1)); - base_grid.select(-1, 2).copy_(linspace_from_neg_one(theta, D, align_corners) - .unsqueeze_(-1) - .unsqueeze_(-1)); - base_grid.select(-1, 3).fill_(1); - - return base_grid; -} - -Tensor affine_grid_generator_4D_forward(const Tensor& theta, int64_t N, - int64_t C, int64_t H, int64_t W, - bool align_corners) { - Tensor base_grid = make_base_grid_4D(theta, N, C, H, W, align_corners); - auto grid = base_grid.view({N, H * W, 3}).bmm(theta.transpose(1, 2)); - return grid.view({N, H, W, 2}); -} - -Tensor affine_grid_generator_5D_forward(const Tensor& theta, int64_t N, - int64_t C, int64_t D, int64_t H, - int64_t W, bool align_corners) { - Tensor base_grid = make_base_grid_5D(theta, N, C, D, H, W, align_corners); - auto grid = base_grid.view({N, D * H * W, 4}).bmm(theta.transpose(1, 2)); - return grid.view({N, D, H, W, 3}); -} - -Tensor affine_grid_generator_forward(const Tensor& theta, IntArrayRef size, - bool align_corners) { - if (size.size() == 4) { - return affine_grid_generator_4D_forward(theta, size[0], size[1], size[2], - size[3], align_corners); - } else { - return affine_grid_generator_5D_forward(theta, size[0], size[1], size[2], - size[3], size[4], align_corners); - } -} - -Tensor affine_grid_generator_4D_backward(const Tensor& grad_grid, int64_t N, - int64_t C, int64_t H, int64_t W, - bool align_corners) { - auto base_grid = make_base_grid_4D(grad_grid, N, C, H, W, align_corners); - AT_ASSERT(grad_grid.sizes() == IntArrayRef({N, H, W, 2})); - auto grad_theta = base_grid.view({N, H * W, 3}) - .transpose(1, 2) - .bmm(grad_grid.view({N, H * W, 2})); - return grad_theta.transpose(1, 2); -} - -Tensor affine_grid_generator_5D_backward(const Tensor& grad_grid, int64_t N, - int64_t C, int64_t D, int64_t H, - int64_t W, bool align_corners) { - auto base_grid = make_base_grid_5D(grad_grid, N, C, D, H, W, align_corners); - AT_ASSERT(grad_grid.sizes() == IntArrayRef({N, D, H, W, 3})); - auto grad_theta = base_grid.view({N, D * H * W, 4}) - .transpose(1, 2) - .bmm(grad_grid.view({N, D * H * W, 3})); - return grad_theta.transpose(1, 2); -} - -Tensor affine_grid_generator_backward(const Tensor& grad, IntArrayRef size, - bool align_corners) { - if (size.size() == 4) { - return affine_grid_generator_4D_backward(grad, size[0], size[1], size[2], - size[3], align_corners); - } else { - return affine_grid_generator_5D_backward(grad, size[0], size[1], size[2], - size[3], size[4], align_corners); - } -} -} // namespace mmdetection diff --git a/mmdet/ops/carafe/src/carafe_ext.cpp b/mmdet/ops/carafe/src/carafe_ext.cpp index 5bee3dafc11..7998ac2cd9a 100644 --- a/mmdet/ops/carafe/src/carafe_ext.cpp +++ b/mmdet/ops/carafe/src/carafe_ext.cpp @@ -22,7 +22,7 @@ int carafe_forward(at::Tensor features, at::Tensor rfeatures, at::Tensor masks, at::Tensor rmasks, int kernel_size, int group_size, int scale_factor, at::Tensor routput, at::Tensor output) { - if (features.type().is_cuda()) { + if (features.device().is_cuda()) { #ifdef WITH_CUDA return carafe_forward_cuda(features, rfeatures, masks, rmasks, kernel_size, group_size, scale_factor, routput, output); @@ -39,7 +39,7 @@ int carafe_backward(at::Tensor top_grad, at::Tensor rfeatures, at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad, at::Tensor rmask_grad, at::Tensor bottom_grad, at::Tensor mask_grad) { - if (top_grad.type().is_cuda()) { + if (top_grad.device().is_cuda()) { #ifdef WITH_CUDA return carafe_backward_cuda(top_grad, rfeatures, masks, kernel_size, group_size, scale_factor, rtop_grad, rbottom_grad_hs, rbottom_grad, diff --git a/mmdet/ops/carafe/src/carafe_naive_ext.cpp b/mmdet/ops/carafe/src/carafe_naive_ext.cpp index 06fe912ad6d..357b8625df8 100644 --- a/mmdet/ops/carafe/src/carafe_naive_ext.cpp +++ b/mmdet/ops/carafe/src/carafe_naive_ext.cpp @@ -18,7 +18,7 @@ int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features, int carafe_naive_forward(at::Tensor features, at::Tensor masks, int kernel_size, int group_size, int scale_factor, at::Tensor output) { - if (features.type().is_cuda()) { + if (features.device().is_cuda()) { #ifdef WITH_CUDA return carafe_naive_forward_cuda(features, masks, kernel_size, group_size, scale_factor, output); @@ -33,7 +33,7 @@ int carafe_naive_backward(at::Tensor top_grad, at::Tensor features, at::Tensor masks, int kernel_size, int group_size, int scale_factor, at::Tensor bottom_grad, at::Tensor mask_grad) { - if (top_grad.type().is_cuda()) { + if (top_grad.device().is_cuda()) { #ifdef WITH_CUDA return carafe_naive_backward_cuda(top_grad, features, masks, kernel_size, group_size, scale_factor, bottom_grad, mask_grad); diff --git a/mmdet/ops/carafe/src/cuda/carafe_cuda.cpp b/mmdet/ops/carafe/src/cuda/carafe_cuda.cpp index 28d890f5451..59b536c027c 100644 --- a/mmdet/ops/carafe/src/cuda/carafe_cuda.cpp +++ b/mmdet/ops/carafe/src/cuda/carafe_cuda.cpp @@ -24,9 +24,9 @@ int CARAFEBackwardLaucher(const at::Tensor top_grad, const at::Tensor rfeatures, at::Tensor rmask_grad, at::Tensor bottom_grad, at::Tensor mask_grad); -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmdet/ops/carafe/src/cuda/carafe_cuda_kernel.cu b/mmdet/ops/carafe/src/cuda/carafe_cuda_kernel.cu index da62755067b..3a02a20f847 100644 --- a/mmdet/ops/carafe/src/cuda/carafe_cuda_kernel.cu +++ b/mmdet/ops/carafe/src/cuda/carafe_cuda_kernel.cu @@ -156,9 +156,9 @@ int CARAFEForwardLaucher(const at::Tensor features, const at::Tensor masks, // one warp per pixel cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.type(), "NCHW2NHWC_Feature", ([&] { - const scalar_t *bottom_data = features.data(); - scalar_t *top_data = rfeatures.data(); + features.scalar_type(), "NCHW2NHWC_Feature", ([&] { + const scalar_t *bottom_data = features.data_ptr(); + scalar_t *top_data = rfeatures.data_ptr(); const int dh = divideUP(channels, kTileDim); const int dw = divideUP(input_height * input_width, kTileDim); BatchTranspose2DCUDAKernel @@ -167,9 +167,9 @@ int CARAFEForwardLaucher(const at::Tensor features, const at::Tensor masks, bottom_data, top_data); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.type(), "NCHW2NHWC_Masks", ([&] { - const scalar_t *bottom_data = masks.data(); - scalar_t *top_data = rmasks.data(); + features.scalar_type(), "NCHW2NHWC_Masks", ([&] { + const scalar_t *bottom_data = masks.data_ptr(); + scalar_t *top_data = rmasks.data_ptr(); const int dh = divideUP(mask_channels, kTileDim); const int dw = divideUP(output_height * output_width, kTileDim); BatchTranspose2DCUDAKernel @@ -178,12 +178,12 @@ int CARAFEForwardLaucher(const at::Tensor features, const at::Tensor masks, bottom_data, top_data); })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.type(), "CARAFELaucherForward", ([&] { + features.scalar_type(), "CARAFELaucherForward", ([&] { const int num_kernels = batch_size * output_height * output_width * THREADS_PER_PIXEL; - const scalar_t *bottom_data = rfeatures.data(); - const scalar_t *bottom_masks = rmasks.data(); - scalar_t *top_data = routput.data(); + const scalar_t *bottom_data = rfeatures.data_ptr(); + const scalar_t *bottom_masks = rmasks.data_ptr(); + scalar_t *top_data = routput.data_ptr(); CARAFEForward <<(); - scalar_t *top_data = output.data(); + features.scalar_type(), "NHWC2NCHW", ([&] { + const scalar_t *bottom_data = routput.data_ptr(); + scalar_t *top_data = output.data_ptr(); const int dh = divideUP(output_height * output_width, kTileDim); const int dw = divideUP(channels, kTileDim); BatchTranspose2DCUDAKernel @@ -388,9 +388,9 @@ int CARAFEBackwardLaucher(const at::Tensor top_grad, const at::Tensor rfeatures, at::Tensor mask_grad) { cudaStream_t stream = at::cuda::getCurrentCUDAStream(); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.type(), "NCHW2NHWC_Top_Grad", ([&] { - const scalar_t *bottom_data = top_grad.data(); - scalar_t *top_data = rtop_grad.data(); + top_grad.scalar_type(), "NCHW2NHWC_Top_Grad", ([&] { + const scalar_t *bottom_data = top_grad.data_ptr(); + scalar_t *top_data = rtop_grad.data_ptr(); const int dh = divideUP(channels, kTileDim); const int dw = divideUP(output_height * output_width, kTileDim); BatchTranspose2DCUDAKernel @@ -400,12 +400,12 @@ int CARAFEBackwardLaucher(const at::Tensor top_grad, const at::Tensor rfeatures, })); AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.type(), "CARAFELaucherBackward_Feature", ([&] { + top_grad.scalar_type(), "CARAFELaucherBackward_Feature", ([&] { const int num_kernels = batch_size * output_height * output_width * THREADS_PER_PIXEL; - const scalar_t *top_diff = rtop_grad.data(); - const scalar_t *bottom_masks = masks.data(); - scalar_t *bottom_diff = rbottom_grad_hs.data(); + const scalar_t *top_diff = rtop_grad.data_ptr(); + const scalar_t *bottom_masks = masks.data_ptr(); + scalar_t *bottom_diff = rbottom_grad_hs.data_ptr(); CARAFEBackward_Feature <<(); - scalar_t *bottom_diff = rbottom_grad.data(); + const scalar_t *bottom_diff_hs = rbottom_grad_hs.data_ptr(); + scalar_t *bottom_diff = rbottom_grad.data_ptr(); FeatureSum <<(); - scalar_t *top_data = bottom_grad.data(); + top_grad.scalar_type(), "NHWC2NCHW_Bottom_Grad", ([&] { + const scalar_t *bottom_data = rbottom_grad.data_ptr(); + scalar_t *top_data = bottom_grad.data_ptr(); const int dh = divideUP(input_height * input_width, kTileDim); const int dw = divideUP(channels, kTileDim); BatchTranspose2DCUDAKernel @@ -440,12 +440,12 @@ int CARAFEBackwardLaucher(const at::Tensor top_grad, const at::Tensor rfeatures, })); AT_DISPATCH_FLOATING_TYPES( - top_grad.type(), "CARAFELaucherBackward_Mask", ([&] { + top_grad.scalar_type(), "CARAFELaucherBackward_Mask", ([&] { const int num_kernels = batch_size * output_height * output_width * mask_channels * WARP_SIZE; - const scalar_t *top_diff = rtop_grad.data(); - const scalar_t *bottom_data = rfeatures.data(); - scalar_t *mask_diff = rmask_grad.data(); + const scalar_t *top_diff = rtop_grad.data_ptr(); + const scalar_t *bottom_data = rfeatures.data_ptr(); + scalar_t *mask_diff = rmask_grad.data_ptr(); CARAFEBackward_Mask <<(); - scalar_t *top_data = mask_grad.data(); + top_grad.scalar_type(), "NHWC2NCHW_Mask_Grad", ([&] { + const scalar_t *bottom_data = rmask_grad.data_ptr(); + scalar_t *top_data = mask_grad.data_ptr(); const int dh = divideUP(output_height * output_width, kTileDim); const int dw = divideUP(mask_channels, kTileDim); BatchTranspose2DCUDAKernel diff --git a/mmdet/ops/carafe/src/cuda/carafe_naive_cuda.cpp b/mmdet/ops/carafe/src/cuda/carafe_naive_cuda.cpp index 611f1d11471..394afd3ad06 100644 --- a/mmdet/ops/carafe/src/cuda/carafe_naive_cuda.cpp +++ b/mmdet/ops/carafe/src/cuda/carafe_naive_cuda.cpp @@ -18,9 +18,9 @@ int CARAFENAIVEBackwardLaucher(const at::Tensor top_grad, const int height, const int width, at::Tensor bottom_grad, at::Tensor mask_grad); -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmdet/ops/carafe/src/cuda/carafe_naive_cuda_kernel.cu b/mmdet/ops/carafe/src/cuda/carafe_naive_cuda_kernel.cu index 3edbae79481..9cf9855a71c 100644 --- a/mmdet/ops/carafe/src/cuda/carafe_naive_cuda_kernel.cu +++ b/mmdet/ops/carafe/src/cuda/carafe_naive_cuda_kernel.cu @@ -76,10 +76,10 @@ int CARAFENAIVEForwardLaucher(const at::Tensor features, const at::Tensor masks, const int width, at::Tensor output) { const int output_size = batch_size * channels * height * width; AT_DISPATCH_FLOATING_TYPES_AND_HALF( - features.type(), "CARAFENAIVELaucherForward", ([&] { - const scalar_t *bottom_data = features.data(); - const scalar_t *bottom_masks = masks.data(); - scalar_t *top_data = output.data(); + features.scalar_type(), "CARAFENAIVELaucherForward", ([&] { + const scalar_t *bottom_data = features.data_ptr(); + const scalar_t *bottom_masks = masks.data_ptr(); + scalar_t *top_data = output.data_ptr(); CARAFENAIVEForward <<>>( @@ -152,12 +152,12 @@ int CARAFENAIVEBackwardLaucher(const at::Tensor top_grad, const int output_size = batch_size * channels * height * width; AT_DISPATCH_FLOATING_TYPES_AND_HALF( - top_grad.type(), "CARAFENAIVELaucherBackward", ([&] { - const scalar_t *top_diff = top_grad.data(); - const scalar_t *bottom_data = features.data(); - const scalar_t *bottom_masks = masks.data(); - scalar_t *bottom_diff = bottom_grad.data(); - scalar_t *mask_diff = mask_grad.data(); + top_grad.scalar_type(), "CARAFENAIVELaucherBackward", ([&] { + const scalar_t *top_diff = top_grad.data_ptr(); + const scalar_t *bottom_data = features.data_ptr(); + const scalar_t *bottom_masks = masks.data_ptr(); + scalar_t *bottom_diff = bottom_grad.data_ptr(); + scalar_t *mask_diff = mask_grad.data_ptr(); CARAFENAIVEBackward <<>>( diff --git a/mmdet/ops/dcn/src/cuda/deform_conv_cuda.cpp b/mmdet/ops/dcn/src/cuda/deform_conv_cuda.cpp index 8601eb3b276..5d9424908ed 100644 --- a/mmdet/ops/dcn/src/cuda/deform_conv_cuda.cpp +++ b/mmdet/ops/dcn/src/cuda/deform_conv_cuda.cpp @@ -63,26 +63,26 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, at::Tensor weight, int kH, int kW, int dH, int dW, int padH, int padW, int dilationH, int dilationW, int group, int deformable_group) { - AT_CHECK(weight.ndimension() == 4, + TORCH_CHECK(weight.ndimension() == 4, "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, " "but got: %s", weight.ndimension()); - AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); - AT_CHECK(kW > 0 && kH > 0, + TORCH_CHECK(kW > 0 && kH > 0, "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); - AT_CHECK((weight.size(2) == kH && weight.size(3) == kW), + TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW), "kernel size should be consistent with weight, ", "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH, kW, weight.size(2), weight.size(3)); - AT_CHECK(dW > 0 && dH > 0, + TORCH_CHECK(dW > 0 && dH > 0, "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); - AT_CHECK( + TORCH_CHECK( dilationW > 0 && dilationH > 0, "dilation should be greater than 0, but got dilationH: %d dilationW: %d", dilationH, dilationW); @@ -98,7 +98,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, dimw++; } - AT_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", + TORCH_CHECK(ndim == 3 || ndim == 4, "3D or 4D input tensor expected but got: %s", ndim); long nInputPlane = weight.size(1) * group; @@ -110,7 +110,7 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, long outputWidth = (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1; - AT_CHECK(nInputPlane % deformable_group == 0, + TORCH_CHECK(nInputPlane % deformable_group == 0, "input channels must divide deformable group size"); if (outputWidth < 1 || outputHeight < 1) @@ -120,27 +120,27 @@ void shape_check(at::Tensor input, at::Tensor offset, at::Tensor *gradOutput, nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight, outputWidth); - AT_CHECK(input.size(1) == nInputPlane, + TORCH_CHECK(input.size(1) == nInputPlane, "invalid number of input planes, expected: %d, but got: %d", nInputPlane, input.size(1)); - AT_CHECK((inputHeight >= kH && inputWidth >= kW), + TORCH_CHECK((inputHeight >= kH && inputWidth >= kW), "input image is smaller than kernel"); - AT_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth), + TORCH_CHECK((offset.size(2) == outputHeight && offset.size(3) == outputWidth), "invalid spatial size of offset, expected height: %d width: %d, but " "got height: %d width: %d", outputHeight, outputWidth, offset.size(2), offset.size(3)); - AT_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), + TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW), "invalid number of channels of offset"); if (gradOutput != NULL) { - AT_CHECK(gradOutput->size(dimf) == nOutputPlane, + TORCH_CHECK(gradOutput->size(dimf) == nOutputPlane, "invalid number of gradOutput planes, expected: %d, but got: %d", nOutputPlane, gradOutput->size(dimf)); - AT_CHECK((gradOutput->size(dimh) == outputHeight && + TORCH_CHECK((gradOutput->size(dimh) == outputHeight && gradOutput->size(dimw) == outputWidth), "invalid size of gradOutput, expected height: %d width: %d , but " "got height: %d width: %d", @@ -191,7 +191,7 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane, outputHeight, outputWidth}); @@ -298,7 +298,7 @@ int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - AT_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); + TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset"); gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth}); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, @@ -414,7 +414,7 @@ int deform_conv_backward_parameters_cuda( long outputHeight = (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1; - AT_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); + TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset"); columns = at::zeros( {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth}, @@ -494,8 +494,8 @@ void modulated_deform_conv_cuda_forward( const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const int group, const int deformable_group, const bool with_bias) { - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); @@ -576,8 +576,8 @@ void modulated_deform_conv_cuda_backward( int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, const bool with_bias) { - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); - AT_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); diff --git a/mmdet/ops/dcn/src/cuda/deform_conv_cuda_kernel.cu b/mmdet/ops/dcn/src/cuda/deform_conv_cuda_kernel.cu index e7a26f2e830..98752dccf8c 100644 --- a/mmdet/ops/dcn/src/cuda/deform_conv_cuda_kernel.cu +++ b/mmdet/ops/dcn/src/cuda/deform_conv_cuda_kernel.cu @@ -258,9 +258,9 @@ void deformable_im2col( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_im.scalar_type(), "deformable_im2col_gpu", ([&] { - const scalar_t *data_im_ = data_im.data(); - const scalar_t *data_offset_ = data_offset.data(); - scalar_t *data_col_ = data_col.data(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); deformable_im2col_gpu_kernel<<>>( num_kernels, data_im_, data_offset_, height, width, ksize_h, ksize_w, @@ -352,9 +352,9 @@ void deformable_col2im( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "deformable_col2im_gpu", ([&] { - const scalar_t *data_col_ = data_col.data(); - const scalar_t *data_offset_ = data_offset.data(); - scalar_t *grad_im_ = grad_im.data(); + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *grad_im_ = grad_im.data_ptr(); deformable_col2im_gpu_kernel<<>>( num_kernels, data_col_, data_offset_, channels, height, width, ksize_h, @@ -450,10 +450,10 @@ void deformable_col2im_coord( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "deformable_col2im_coord_gpu", ([&] { - const scalar_t *data_col_ = data_col.data(); - const scalar_t *data_im_ = data_im.data(); - const scalar_t *data_offset_ = data_offset.data(); - scalar_t *grad_offset_ = grad_offset.data(); + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + scalar_t *grad_offset_ = grad_offset.data_ptr(); deformable_col2im_coord_gpu_kernel<<>>( num_kernels, data_col_, data_im_, data_offset_, channels, height, width, @@ -780,10 +780,10 @@ void modulated_deformable_im2col_cuda( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_im.scalar_type(), "modulated_deformable_im2col_gpu", ([&] { - const scalar_t *data_im_ = data_im.data(); - const scalar_t *data_offset_ = data_offset.data(); - const scalar_t *data_mask_ = data_mask.data(); - scalar_t *data_col_ = data_col.data(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *data_col_ = data_col.data_ptr(); modulated_deformable_im2col_gpu_kernel<<>>( num_kernels, data_im_, data_offset_, data_mask_, height_im, width_im, kernel_h, kenerl_w, @@ -812,10 +812,10 @@ void modulated_deformable_col2im_cuda( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "modulated_deformable_col2im_gpu", ([&] { - const scalar_t *data_col_ = data_col.data(); - const scalar_t *data_offset_ = data_offset.data(); - const scalar_t *data_mask_ = data_mask.data(); - scalar_t *grad_im_ = grad_im.data(); + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *grad_im_ = grad_im.data_ptr(); modulated_deformable_col2im_gpu_kernel<<>>( num_kernels, data_col_, data_offset_, data_mask_, channels, height_im, width_im, @@ -845,12 +845,12 @@ void modulated_deformable_col2im_coord_cuda( AT_DISPATCH_FLOATING_TYPES_AND_HALF( data_col.scalar_type(), "modulated_deformable_col2im_coord_gpu", ([&] { - const scalar_t *data_col_ = data_col.data(); - const scalar_t *data_im_ = data_im.data(); - const scalar_t *data_offset_ = data_offset.data(); - const scalar_t *data_mask_ = data_mask.data(); - scalar_t *grad_offset_ = grad_offset.data(); - scalar_t *grad_mask_ = grad_mask.data(); + const scalar_t *data_col_ = data_col.data_ptr(); + const scalar_t *data_im_ = data_im.data_ptr(); + const scalar_t *data_offset_ = data_offset.data_ptr(); + const scalar_t *data_mask_ = data_mask.data_ptr(); + scalar_t *grad_offset_ = grad_offset.data_ptr(); + scalar_t *grad_mask_ = grad_mask.data_ptr(); modulated_deformable_col2im_coord_gpu_kernel<<>>( num_kernels, data_col_, data_im_, data_offset_, data_mask_, channels, height_im, width_im, diff --git a/mmdet/ops/dcn/src/cuda/deform_pool_cuda.cpp b/mmdet/ops/dcn/src/cuda/deform_pool_cuda.cpp index d7ed3f639ea..3c09f998029 100644 --- a/mmdet/ops/dcn/src/cuda/deform_pool_cuda.cpp +++ b/mmdet/ops/dcn/src/cuda/deform_pool_cuda.cpp @@ -33,7 +33,7 @@ void deform_psroi_pooling_cuda_forward( at::Tensor top_count, const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); @@ -59,8 +59,8 @@ void deform_psroi_pooling_cuda_backward( const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { - AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); - AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); + TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); + TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); at::DeviceGuard guard(input.device()); const int batch = input.size(0); diff --git a/mmdet/ops/dcn/src/cuda/deform_pool_cuda_kernel.cu b/mmdet/ops/dcn/src/cuda/deform_pool_cuda_kernel.cu index 05b00d4be61..18e3a048d3f 100644 --- a/mmdet/ops/dcn/src/cuda/deform_pool_cuda_kernel.cu +++ b/mmdet/ops/dcn/src/cuda/deform_pool_cuda_kernel.cu @@ -290,11 +290,11 @@ void DeformablePSROIPoolForward(const at::Tensor data, AT_DISPATCH_FLOATING_TYPES_AND_HALF( data.scalar_type(), "deformable_psroi_pool_forward", ([&] { - const scalar_t *bottom_data = data.data(); - const scalar_t *bottom_rois = bbox.data(); - const scalar_t *bottom_trans = no_trans ? NULL : trans.data(); - scalar_t *top_data = out.data(); - scalar_t *top_count_data = top_count.data(); + const scalar_t *bottom_data = data.data_ptr(); + const scalar_t *bottom_rois = bbox.data_ptr(); + const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr(); + scalar_t *top_data = out.data_ptr(); + scalar_t *top_count_data = top_count.data_ptr(); DeformablePSROIPoolForwardKernel<<>>( count, bottom_data, (scalar_t)spatial_scale, channels, height, width, pooled_height, pooled_width, @@ -341,13 +341,13 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad, AT_DISPATCH_FLOATING_TYPES_AND_HALF( out_grad.scalar_type(), "deformable_psroi_pool_backward_acc", ([&] { - const scalar_t *top_diff = out_grad.data(); - const scalar_t *bottom_data = data.data(); - const scalar_t *bottom_rois = bbox.data(); - const scalar_t *bottom_trans = no_trans ? NULL : trans.data(); - scalar_t *bottom_data_diff = in_grad.data(); - scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data(); - const scalar_t *top_count_data = top_count.data(); + const scalar_t *top_diff = out_grad.data_ptr(); + const scalar_t *bottom_data = data.data_ptr(); + const scalar_t *bottom_rois = bbox.data_ptr(); + const scalar_t *bottom_trans = no_trans ? NULL : trans.data_ptr(); + scalar_t *bottom_data_diff = in_grad.data_ptr(); + scalar_t *bottom_trans_diff = no_trans ? NULL : trans_grad.data_ptr(); + const scalar_t *top_count_data = top_count.data_ptr(); DeformablePSROIPoolBackwardAccKernel<<>>( count, top_diff, top_count_data, num_rois, (scalar_t)spatial_scale, channels, height, width, diff --git a/mmdet/ops/dcn/src/deform_conv_ext.cpp b/mmdet/ops/dcn/src/deform_conv_ext.cpp index 2beaeffcbb0..fac60162b69 100644 --- a/mmdet/ops/dcn/src/deform_conv_ext.cpp +++ b/mmdet/ops/dcn/src/deform_conv_ext.cpp @@ -54,7 +54,7 @@ int deform_conv_forward(at::Tensor input, at::Tensor weight, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step) { - if (input.type().is_cuda()) { + if (input.device().is_cuda()) { #ifdef WITH_CUDA return deform_conv_forward_cuda(input, weight, offset, output, columns, ones, kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, @@ -73,7 +73,7 @@ int deform_conv_backward_input(at::Tensor input, at::Tensor offset, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, int im2col_step) { - if (input.type().is_cuda()) { + if (input.device().is_cuda()) { #ifdef WITH_CUDA return deform_conv_backward_input_cuda(input, offset, gradOutput, gradInput, gradOffset, weight, columns, kW, kH, dW, dH, padW, padH, @@ -91,7 +91,7 @@ int deform_conv_backward_parameters( at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH, int group, int deformable_group, float scale, int im2col_step) { - if (input.type().is_cuda()) { + if (input.device().is_cuda()) { #ifdef WITH_CUDA return deform_conv_backward_parameters_cuda(input, offset, gradOutput, gradWeight, columns, ones, kW, kH, dW, dH, padW, padH, dilationW, @@ -110,7 +110,7 @@ void modulated_deform_conv_forward( const int pad_h, const int pad_w, const int dilation_h, const int dilation_w, const int group, const int deformable_group, const bool with_bias) { - if (input.type().is_cuda()) { + if (input.device().is_cuda()) { #ifdef WITH_CUDA return modulated_deform_conv_cuda_forward(input, weight, bias, ones, offset, mask, output, columns, kernel_h, kernel_w, stride_h, @@ -131,7 +131,7 @@ void modulated_deform_conv_backward( int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, const bool with_bias) { - if (input.type().is_cuda()) { + if (input.device().is_cuda()) { #ifdef WITH_CUDA return modulated_deform_conv_cuda_backward(input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight, grad_bias, grad_offset, diff --git a/mmdet/ops/dcn/src/deform_pool_ext.cpp b/mmdet/ops/dcn/src/deform_pool_ext.cpp index f590fabec5f..877064828d5 100644 --- a/mmdet/ops/dcn/src/deform_pool_ext.cpp +++ b/mmdet/ops/dcn/src/deform_pool_ext.cpp @@ -31,7 +31,7 @@ void deform_psroi_pooling_forward( at::Tensor top_count, const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { - if (input.type().is_cuda()) { + if (input.device().is_cuda()) { #ifdef WITH_CUDA return deform_psroi_pooling_cuda_forward(input, bbox, trans, out, top_count, no_trans, spatial_scale, output_dim, group_size, pooled_size, @@ -49,7 +49,7 @@ void deform_psroi_pooling_backward( const int no_trans, const float spatial_scale, const int output_dim, const int group_size, const int pooled_size, const int part_size, const int sample_per_part, const float trans_std) { - if (input.type().is_cuda()) { + if (input.device().is_cuda()) { #ifdef WITH_CUDA return deform_psroi_pooling_cuda_backward(out_grad, input, bbox, trans, top_count, input_grad, trans_grad, no_trans, spatial_scale, diff --git a/mmdet/ops/grid_sampler/__init__.py b/mmdet/ops/grid_sampler/__init__.py deleted file mode 100644 index 868617a6b3f..00000000000 --- a/mmdet/ops/grid_sampler/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .grid_sampler import grid_sample - -__all__ = ['grid_sample'] diff --git a/mmdet/ops/grid_sampler/grid_sampler.py b/mmdet/ops/grid_sampler/grid_sampler.py deleted file mode 100644 index b5c59aa4906..00000000000 --- a/mmdet/ops/grid_sampler/grid_sampler.py +++ /dev/null @@ -1,100 +0,0 @@ -import torch -import torch.nn.functional as F -from torch.autograd import Function -from torch.autograd.function import once_differentiable - -from . import grid_sampler_ext - - -class _GridSampler(Function): - - @staticmethod - def forward(ctx, input, grid, mode_enum, padding_mode_enum, align_corners): - - ctx.save_for_backward(input, grid) - ctx.mode_enum = mode_enum - ctx.padding_mode_enum = padding_mode_enum - ctx.align_corners = align_corners - - output = grid_sampler_ext.grid_sampler_forward(input, grid, mode_enum, - padding_mode_enum, - align_corners) - - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output): - input, grid = ctx.saved_tensors - mode_enum = ctx.mode_enum - padding_mode_enum = ctx.padding_mode_enum - align_corners = ctx.align_corners - - grad_input, grad_grid = grid_sampler_ext.grid_sampler_backward( - grad_output, input, grid, mode_enum, padding_mode_enum, - align_corners) - - return grad_input, grad_grid, None, None, None - - -def grid_sample(input, - grid, - mode='bilinear', - padding_mode='zeros', - align_corners=False): - if torch.__version__ >= '1.3': - return F.grid_sample(input, grid, mode, padding_mode, align_corners) - elif align_corners: - return F.grid_sample(input, grid, mode, padding_mode) - else: - - # use self-compiled grid_sampler to support align_corners=False - - assert mode in ['bilinear', 'nearest'], \ - 'expected mode to be bilinear or nearest, but got: {}'.format(mode) - - assert padding_mode in ['zeros', 'border', 'reflection'], \ - 'expected padding_mode to be zeros, border, or reflection, ' \ - 'but got: {}'.format(padding_mode) - - if mode == 'bilinear': - mode_enum = 0 - else: - mode_enum = 1 - - if padding_mode == 'zeros': - padding_mode_enum = 0 - elif padding_mode == 'border': - padding_mode_enum = 1 - else: - padding_mode_enum = 2 - - # shape check - assert input.device == grid.device, \ - 'expected input and grid to be on same device, ' \ - 'but input is on {} and grid is on {}'.format( - input.device, grid.device) - assert input.dtype == grid.dtype, \ - 'expected input and grid to have the same dtype, ' \ - 'but input has {} and grid has {}'.format( - input.dtype, grid.dtype) - assert input.dim() == 4 or input.dim() == 5, \ - 'expected 4D or 5D input and grid with same number of dimensions' \ - 'but got input with sizes {} and grid with sizes {}'.format( - input.size(), grid.size()) - assert input.size(0) == grid.size(0), \ - 'expected input and grid to have the same batch size, ' \ - 'but got input with sizes {} and grid with sizes {}'.format( - input.size(), grid.size()) - assert grid.size(-1) == input.dim() - 2, \ - 'expected grid to have size {} in last {} dimension, ' \ - 'but got grid with sizes '.format( - input.dim() - 2, grid.size()) - for i in range(2, input.dim()): - assert input.size(i) > 0, \ - 'expected input to have non-empty spatial dimensions, ' \ - 'but input has sizes {} with dimension {} being empty'.format( - input.sizes(), i) - - return _GridSampler.apply(input, grid, mode_enum, padding_mode_enum, - align_corners) diff --git a/mmdet/ops/grid_sampler/src/cpu/grid_sampler_cpu.cpp b/mmdet/ops/grid_sampler/src/cpu/grid_sampler_cpu.cpp deleted file mode 100644 index cf1776ed1d7..00000000000 --- a/mmdet/ops/grid_sampler/src/cpu/grid_sampler_cpu.cpp +++ /dev/null @@ -1,692 +0,0 @@ -// Modified from https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/GridSampler.cpp - -#include -#include "grid_sampler_cpu.h" -#include -#include -#include -#include -#include - -#ifdef _OPENMP -#include -#endif - -namespace mmdetection { - -using namespace at; -using mmdetection::detail::GridSamplerInterpolation; -using mmdetection::detail::GridSamplerPadding; - -namespace { - - template - Tensor grid_sampler_2d_forward_cpu_impl(const Tensor& input, const Tensor& grid, - GridSamplerInterpolation interpolation_mode, - GridSamplerPadding padding_mode, - bool align_corners) { - int64_t N = input.size(0); - int64_t C = input.size(1); - int64_t inp_H = input.size(2); - int64_t inp_W = input.size(3); - int64_t out_H = grid.size(1); - int64_t out_W = grid.size(2); - auto output = at::empty({N, C, out_H, out_W}, input.options()); - int64_t inp_sN = input.stride(0); - int64_t inp_sC = input.stride(1); - int64_t inp_sH = input.stride(2); - int64_t inp_sW = input.stride(3); - int64_t grid_sN = grid.stride(0); - int64_t grid_sH = grid.stride(1); - int64_t grid_sW = grid.stride(2); - int64_t grid_sCoor = grid.stride(3); - int64_t out_sN = output.stride(0); - int64_t out_sC = output.stride(1); - int64_t out_sH = output.stride(2); - int64_t out_sW = output.stride(3); - scalar_t *inp_ptr = input.data(); - scalar_t *out_ptr = output.data(); - scalar_t *grid_ptr = grid.data(); - // loop over each output pixel - #ifdef _OPENMP - #pragma omp parallel for - #endif - for (int64_t n = 0; n < N; ++n) { - scalar_t *grid_ptr_N = grid_ptr + n * grid_sN; - scalar_t *inp_ptr_N = inp_ptr + n * inp_sN; - for (int64_t h = 0; h < out_H; ++h) { - for (int64_t w = 0; w < out_W; ++w) { - // get the corresponding input x, y, z co-ordinates from grid - scalar_t *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW; - scalar_t ix = *grid_ptr_NHW; - scalar_t iy = grid_ptr_NHW[grid_sCoor]; - - ix = grid_sampler_compute_source_index(ix, inp_W, padding_mode, align_corners); - iy = grid_sampler_compute_source_index(iy, inp_H, padding_mode, align_corners); - - if (interpolation_mode == GridSamplerInterpolation::Bilinear) { - // get corner pixel values from (x, y, z) - // for 4d, we used north-east-south-west - // for 5d, we add top-bottom - int64_t ix_nw = static_cast(std::floor(ix)); - int64_t iy_nw = static_cast(std::floor(iy)); - - int64_t ix_ne = ix_nw + 1; - int64_t iy_ne = iy_nw; - - int64_t ix_sw = ix_nw; - int64_t iy_sw = iy_nw + 1; - - int64_t ix_se = ix_nw + 1; - int64_t iy_se = iy_nw + 1; - - // get surfaces to each neighbor: - scalar_t nw = (ix_se - ix) * (iy_se - iy) ; - scalar_t ne = (ix - ix_sw) * (iy_sw - iy) ; - scalar_t sw = (ix_ne - ix) * (iy - iy_ne); - scalar_t se = (ix - ix_nw) * (iy - iy_nw); - - // calculate bilinear weighted pixel value and set output pixel - scalar_t *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; - scalar_t *inp_ptr_NC = inp_ptr_N; - for (int c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { - // (c, iz_tnw, iy_tnw, ix_tnw) * tnw + (c, iz_tne, iy_tne, ix_tne) * tne - // + (c, iz_tsw, iy_tsw, ix_tsw) * tsw + (c, iz_tse, iy_tse, ix_tse) * tse - // + (c, iz_bnw, iy_bnw, ix_bnw) * bnw + (c, iz_bne, iy_bne, ix_bne) * bne - // + (c, iz_bsw, iy_bsw, ix_bsw) * bsw + (c, iz_bse, iy_bse, ix_bse) * bse - *out_ptr_NCHW = static_cast(0); - if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) { - *out_ptr_NCHW += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw; - } - if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) { - *out_ptr_NCHW += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne; - } - if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) { - *out_ptr_NCHW += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw; - } - if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) { - *out_ptr_NCHW += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se; - } - } - } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { - int64_t ix_nearest = static_cast(std::round(ix)); - int64_t iy_nearest = static_cast(std::round(iy)); - - // assign nearest neighor pixel value to output pixel - scalar_t *out_ptr_NCHW = out_ptr + n * out_sN + h * out_sH + w * out_sW; - scalar_t *inp_ptr_NC = inp_ptr_N; - for (int c = 0; c < C; ++c, out_ptr_NCHW += out_sC, inp_ptr_NC += inp_sC) { - if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) { - *out_ptr_NCHW = inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW]; - } else { - *out_ptr_NCHW = static_cast(0); - } - } - } - } - } - } - - return output; - } - - template - Tensor grid_sampler_3d_forward_cpu_impl(const Tensor& input, const Tensor& grid, - GridSamplerInterpolation interpolation_mode, - GridSamplerPadding padding_mode, - bool align_corners) { - int64_t N = input.size(0); - int64_t C = input.size(1); - int64_t inp_D = input.size(2); - int64_t inp_H = input.size(3); - int64_t inp_W = input.size(4); - int64_t out_D = grid.size(1); - int64_t out_H = grid.size(2); - int64_t out_W = grid.size(3); - auto output = at::empty({N, C, out_D, out_H, out_W}, input.options()); - int64_t inp_sN = input.stride(0); - int64_t inp_sC = input.stride(1); - int64_t inp_sD = input.stride(2); - int64_t inp_sH = input.stride(3); - int64_t inp_sW = input.stride(4); - int64_t grid_sN = grid.stride(0); - int64_t grid_sD = grid.stride(1); - int64_t grid_sH = grid.stride(2); - int64_t grid_sW = grid.stride(3); - int64_t grid_sCoor = grid.stride(4); - int64_t out_sN = output.stride(0); - int64_t out_sC = output.stride(1); - int64_t out_sD = output.stride(2); - int64_t out_sH = output.stride(3); - int64_t out_sW = output.stride(4); - scalar_t *inp_ptr = input.data(); - scalar_t *out_ptr = output.data(); - scalar_t *grid_ptr = grid.data(); - // loop over each output pixel - #ifdef _OPENMP - #pragma omp parallel for - #endif - for (int64_t n = 0; n < N; ++n) { - scalar_t *grid_ptr_N = grid_ptr + n * grid_sN; - scalar_t *inp_ptr_N = inp_ptr + n * inp_sN; - for (int64_t d = 0; d < out_D; ++d) { - for (int64_t h = 0; h < out_H; ++h) { - for (int64_t w = 0; w < out_W; ++w) { - // get the corresponding input x, y, z co-ordinates from grid - scalar_t *grid_ptr_NDHW = grid_ptr_N + d * grid_sD + h * grid_sH + w * grid_sW; - scalar_t ix = *grid_ptr_NDHW; - scalar_t iy = grid_ptr_NDHW[grid_sCoor]; - scalar_t iz = grid_ptr_NDHW[2 * grid_sCoor]; - - ix = grid_sampler_compute_source_index(ix, inp_W, padding_mode, align_corners); - iy = grid_sampler_compute_source_index(iy, inp_H, padding_mode, align_corners); - iz = grid_sampler_compute_source_index(iz, inp_D, padding_mode, align_corners); - - if (interpolation_mode == GridSamplerInterpolation::Bilinear) { - // get corner pixel values from (x, y, z) - // for 4d, we used north-east-south-west - // for 5d, we add top-bottom - int64_t ix_tnw = static_cast(std::floor(ix)); - int64_t iy_tnw = static_cast(std::floor(iy)); - int64_t iz_tnw = static_cast(std::floor(iz)); - - int64_t ix_tne = ix_tnw + 1; - int64_t iy_tne = iy_tnw; - int64_t iz_tne = iz_tnw; - - int64_t ix_tsw = ix_tnw; - int64_t iy_tsw = iy_tnw + 1; - int64_t iz_tsw = iz_tnw; - - int64_t ix_tse = ix_tnw + 1; - int64_t iy_tse = iy_tnw + 1; - int64_t iz_tse = iz_tnw; - - int64_t ix_bnw = ix_tnw; - int64_t iy_bnw = iy_tnw; - int64_t iz_bnw = iz_tnw + 1; - - int64_t ix_bne = ix_tnw + 1; - int64_t iy_bne = iy_tnw; - int64_t iz_bne = iz_tnw + 1; - - int64_t ix_bsw = ix_tnw; - int64_t iy_bsw = iy_tnw + 1; - int64_t iz_bsw = iz_tnw + 1; - - int64_t ix_bse = ix_tnw + 1; - int64_t iy_bse = iy_tnw + 1; - int64_t iz_bse = iz_tnw + 1; - - // get surfaces to each neighbor: - scalar_t tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz); - scalar_t tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz); - scalar_t tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz); - scalar_t tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz); - scalar_t bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse); - scalar_t bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw); - scalar_t bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne); - scalar_t bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw); - - // calculate bilinear weighted pixel value and set output pixel - scalar_t *out_ptr_NCDHW = out_ptr + n * out_sN + d * out_sD + h * out_sH + w * out_sW; - scalar_t *inp_ptr_NC = inp_ptr_N; - for (int c = 0; c < C; ++c, out_ptr_NCDHW += out_sC, inp_ptr_NC += inp_sC) { - // (c, iz_tnw, iy_tnw, ix_tnw) * tnw + (c, iz_tne, iy_tne, ix_tne) * tne - // + (c, iz_tsw, iy_tsw, ix_tsw) * tsw + (c, iz_tse, iy_tse, ix_tse) * tse - // + (c, iz_bnw, iy_bnw, ix_bnw) * bnw + (c, iz_bne, iy_bne, ix_bne) * bne - // + (c, iz_bsw, iy_bsw, ix_bsw) * bsw + (c, iz_bse, iy_bse, ix_bse) * bse - *out_ptr_NCDHW = static_cast(0); - if (within_bounds_3d(iz_tnw, iy_tnw, ix_tnw, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_tnw * inp_sD + iy_tnw * inp_sH + ix_tnw * inp_sW] * tnw; - } - if (within_bounds_3d(iz_tne, iy_tne, ix_tne, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_tne * inp_sD + iy_tne * inp_sH + ix_tne * inp_sW] * tne; - } - if (within_bounds_3d(iz_tsw, iy_tsw, ix_tsw, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_tsw * inp_sD + iy_tsw * inp_sH + ix_tsw * inp_sW] * tsw; - } - if (within_bounds_3d(iz_tse, iy_tse, ix_tse, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_tse * inp_sD + iy_tse * inp_sH + ix_tse * inp_sW] * tse; - } - if (within_bounds_3d(iz_bnw, iy_bnw, ix_bnw, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_bnw * inp_sD + iy_bnw * inp_sH + ix_bnw * inp_sW] * bnw; - } - if (within_bounds_3d(iz_bne, iy_bne, ix_bne, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_bne * inp_sD + iy_bne * inp_sH + ix_bne * inp_sW] * bne; - } - if (within_bounds_3d(iz_bsw, iy_bsw, ix_bsw, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_bsw * inp_sD + iy_bsw * inp_sH + ix_bsw * inp_sW] * bsw; - } - if (within_bounds_3d(iz_bse, iy_bse, ix_bse, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_bse * inp_sD + iy_bse * inp_sH + ix_bse * inp_sW] * bse; - } - } - } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { - int64_t ix_nearest = static_cast(std::round(ix)); - int64_t iy_nearest = static_cast(std::round(iy)); - int64_t iz_nearest = static_cast(std::round(iz)); - - // assign nearest neighor pixel value to output pixel - scalar_t *out_ptr_NCDHW = out_ptr + n * out_sN + d * out_sD + h * out_sH + w * out_sW; - scalar_t *inp_ptr_NC = inp_ptr_N; - for (int c = 0; c < C; ++c, out_ptr_NCDHW += out_sC, inp_ptr_NC += inp_sC) { - if (within_bounds_3d(iz_nearest, iy_nearest, ix_nearest, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW = inp_ptr_NC[iz_nearest * inp_sD + iy_nearest * inp_sH + ix_nearest * inp_sW]; - } else { - *out_ptr_NCDHW = static_cast(0); - } - } - } - } - } - } - } - return output; - } - - template - std::tuple - grid_sampler_2d_backward_cpu_impl(const Tensor& grad_output, - const Tensor& input, const Tensor& grid, - GridSamplerInterpolation interpolation_mode, - GridSamplerPadding padding_mode, - bool align_corners) { - auto grad_input = at::zeros_like(input); - auto grad_grid = at::empty_like(grid); - // If interpolation mode is Nearest, then grad_grid is not filled in the - // loop below. - if (interpolation_mode == GridSamplerInterpolation::Nearest) { - grad_grid.zero_(); - } - int64_t N = input.size(0); - int64_t C = input.size(1); - int64_t inp_H = input.size(2); - int64_t inp_W = input.size(3); - int64_t out_H = grid.size(1); - int64_t out_W = grid.size(2); - int64_t inp_sN = input.stride(0); - int64_t inp_sC = input.stride(1); - int64_t inp_sH = input.stride(2); - int64_t inp_sW = input.stride(3); - int64_t grid_sN = grid.stride(0); - int64_t grid_sH = grid.stride(1); - int64_t grid_sW = grid.stride(2); - int64_t grid_sCoor = grid.stride(3); - int64_t gOut_sN = grad_output.stride(0); - int64_t gOut_sC = grad_output.stride(1); - int64_t gOut_sH = grad_output.stride(2); - int64_t gOut_sW = grad_output.stride(3); - int64_t gInp_sN = grad_input.stride(0); - int64_t gInp_sC = grad_input.stride(1); - int64_t gInp_sH = grad_input.stride(2); - int64_t gInp_sW = grad_input.stride(3); - int64_t gGrid_sN = grad_grid.stride(0); - int64_t gGrid_sW = grad_grid.stride(2); - scalar_t *inp_ptr = input.data(); - scalar_t *grid_ptr = grid.data(); - scalar_t *gOut_ptr = grad_output.data(); - scalar_t *gInp_ptr = grad_input.data(); - scalar_t *gGrid_ptr = grad_grid.data(); - // loop over each output pixel - #ifdef _OPENMP - #pragma omp parallel for - #endif - for (int64_t n = 0; n < N; ++n) { - scalar_t *grid_ptr_N = grid_ptr + n * grid_sN; - scalar_t *inp_ptr_N = inp_ptr + n * inp_sN; - scalar_t *gGrid_ptr_NHW = gGrid_ptr + n * gGrid_sN; - for (int64_t h = 0; h < out_H; ++h) { - for (int64_t w = 0; w < out_W; ++w, gGrid_ptr_NHW += gGrid_sW /* grad_grid is contiguous */ ) { - // get the corresponding input x, y, z co-ordinates from grid - scalar_t *grid_ptr_NHW = grid_ptr_N + h * grid_sH + w * grid_sW; - scalar_t ix = *grid_ptr_NHW; - scalar_t iy = grid_ptr_NHW[grid_sCoor]; - - // multipliers for gradients on ix, iy, and iz - scalar_t gix_mult, giy_mult; - ix = grid_sampler_compute_source_index_set_grad(ix, inp_W, padding_mode, align_corners, &gix_mult); - iy = grid_sampler_compute_source_index_set_grad(iy, inp_H, padding_mode, align_corners, &giy_mult); - - if (interpolation_mode == GridSamplerInterpolation::Bilinear) { - // get corner pixel values from (x, y, z) - // for 4d, we used north-east-south-west - // for 5d, we add top-bottom - int64_t ix_nw = static_cast(std::floor(ix)); - int64_t iy_nw = static_cast(std::floor(iy)); - - int64_t ix_ne = ix_nw + 1; - int64_t iy_ne = iy_nw; - - int64_t ix_sw = ix_nw; - int64_t iy_sw = iy_nw + 1; - - int64_t ix_se = ix_nw + 1; - int64_t iy_se = iy_nw + 1; - - // get surfaces to each neighbor: - scalar_t nw = (ix_se - ix) * (iy_se - iy) ; - scalar_t ne = (ix - ix_sw) * (iy_sw - iy) ; - scalar_t sw = (ix_ne - ix) * (iy - iy_ne); - scalar_t se = (ix - ix_nw) * (iy - iy_nw); - - scalar_t gix = static_cast(0), giy = static_cast(0); - scalar_t *gOut_ptr_NCHW = gOut_ptr + n * gOut_sN + h * gOut_sH + w * gOut_sW; - scalar_t *gInp_ptr_NC = gInp_ptr + n * gInp_sN; - scalar_t *inp_ptr_NC = inp_ptr_N; - // calculate bilinear weighted pixel value and set output pixel - for (int c = 0; c < C; ++c, gOut_ptr_NCHW += gOut_sC, gInp_ptr_NC += gInp_sC, inp_ptr_NC += inp_sC) { - scalar_t gOut = *gOut_ptr_NCHW; - - // calculate and set grad_input - safe_add_2d(gInp_ptr_NC, iy_nw, ix_nw, gInp_sH, gInp_sW, inp_H, inp_W, nw * gOut); - safe_add_2d(gInp_ptr_NC, iy_ne, ix_ne, gInp_sH, gInp_sW, inp_H, inp_W, ne * gOut); - safe_add_2d(gInp_ptr_NC, iy_sw, ix_sw, gInp_sH, gInp_sW, inp_H, inp_W, sw * gOut); - safe_add_2d(gInp_ptr_NC, iy_se, ix_se, gInp_sH, gInp_sW, inp_H, inp_W, se * gOut); - - // calculate grad_grid - if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) { - scalar_t nw_val = inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW]; - gix -= nw_val * (iy_se - iy) * gOut; - giy -= nw_val * (ix_se - ix) * gOut; - } - if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) { - scalar_t ne_val = inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW]; - gix += ne_val * (iy_sw - iy) * gOut; - giy -= ne_val * (ix - ix_sw) * gOut; - } - if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) { - scalar_t sw_val = inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW]; - gix -= sw_val * (iy - iy_ne) * gOut; - giy += sw_val * (ix_ne - ix) * gOut; - } - if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) { - scalar_t se_val = inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW]; - gix += se_val * (iy - iy_nw) * gOut; - giy += se_val * (ix - ix_nw) * gOut; - } - } - - // assuming grad_grid is contiguous - gGrid_ptr_NHW[0] = gix_mult * gix; - gGrid_ptr_NHW[1] = giy_mult * giy; - } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { - int64_t ix_nearest = static_cast(std::round(ix)); - int64_t iy_nearest = static_cast(std::round(iy)); - - // assign nearest neighor pixel value to output pixel - scalar_t *gOut_ptr_NCHW = gOut_ptr + n * gOut_sN + h * gOut_sH + w * gOut_sW; - scalar_t *gInp_ptr_NC = gInp_ptr + n * gInp_sN; - for (int c = 0; c < C; ++c, gOut_ptr_NCHW += gOut_sC, gInp_ptr_NC += gInp_sC) { - // calculate and set grad_input - safe_add_2d(gInp_ptr_NC, iy_nearest, ix_nearest, - gInp_sH, gInp_sW, inp_H, inp_W, *gOut_ptr_NCHW); - } - } - } - } - } - return std::make_tuple(grad_input, grad_grid); - } - - template - std::tuple - grid_sampler_3d_backward_cpu_impl(const Tensor& grad_output, - const Tensor& input, const Tensor& grid, - GridSamplerInterpolation interpolation_mode, - GridSamplerPadding padding_mode, - bool align_corners) { - auto grad_input = at::zeros_like(input); - auto grad_grid = at::empty_like(grid); - // If interpolation mode is Nearest, then grad_grid is not filled in the - // loop below. - if (interpolation_mode == GridSamplerInterpolation::Nearest) { - grad_grid.zero_(); - } - int64_t N = input.size(0); - int64_t C = input.size(1); - int64_t inp_D = input.size(2); - int64_t inp_H = input.size(3); - int64_t inp_W = input.size(4); - int64_t out_D = grid.size(1); - int64_t out_H = grid.size(2); - int64_t out_W = grid.size(3); - int64_t inp_sN = input.stride(0); - int64_t inp_sC = input.stride(1); - int64_t inp_sD = input.stride(2); - int64_t inp_sH = input.stride(3); - int64_t inp_sW = input.stride(4); - int64_t grid_sN = grid.stride(0); - int64_t grid_sD = grid.stride(1); - int64_t grid_sH = grid.stride(2); - int64_t grid_sW = grid.stride(3); - int64_t grid_sCoor = grid.stride(4); - int64_t gOut_sN = grad_output.stride(0); - int64_t gOut_sC = grad_output.stride(1); - int64_t gOut_sD = grad_output.stride(2); - int64_t gOut_sH = grad_output.stride(3); - int64_t gOut_sW = grad_output.stride(4); - int64_t gInp_sN = grad_input.stride(0); - int64_t gInp_sC = grad_input.stride(1); - int64_t gInp_sD = grad_input.stride(2); - int64_t gInp_sH = grad_input.stride(3); - int64_t gInp_sW = grad_input.stride(4); - int64_t gGrid_sN = grad_grid.stride(0); - int64_t gGrid_sW = grad_grid.stride(3); - scalar_t *inp_ptr = input.data(); - scalar_t *grid_ptr = grid.data(); - scalar_t *gOut_ptr = grad_output.data(); - scalar_t *gInp_ptr = grad_input.data(); - scalar_t *gGrid_ptr = grad_grid.data(); - // loop over each output pixel - #ifdef _OPENMP - #pragma omp parallel for - #endif - for (int64_t n = 0; n < N; ++n) { - scalar_t *grid_ptr_N = grid_ptr + n * grid_sN; - scalar_t *inp_ptr_N = inp_ptr + n * inp_sN; - scalar_t *gGrid_ptr_NDHW = gGrid_ptr + n * gGrid_sN; - for (int64_t d = 0; d < out_D; ++d) { - for (int64_t h = 0; h < out_H; ++h) { - for (int64_t w = 0; w < out_W; ++w, gGrid_ptr_NDHW += gGrid_sW /* grad_grid is contiguous */ ) { - // get the corresponding input x, y, z co-ordinates from grid - scalar_t *grid_ptr_NDHW = grid_ptr_N + d * grid_sD + h * grid_sH + w * grid_sW; - scalar_t ix = *grid_ptr_NDHW; - scalar_t iy = grid_ptr_NDHW[grid_sCoor]; - scalar_t iz = grid_ptr_NDHW[2 * grid_sCoor]; - - // multipliers for gradients on ix, iy, and iz - scalar_t gix_mult, giy_mult, giz_mult; - ix = grid_sampler_compute_source_index_set_grad(ix, inp_W, padding_mode, align_corners, &gix_mult); - iy = grid_sampler_compute_source_index_set_grad(iy, inp_H, padding_mode, align_corners, &giy_mult); - iz = grid_sampler_compute_source_index_set_grad(iz, inp_D, padding_mode, align_corners, &giz_mult); - - if (interpolation_mode == GridSamplerInterpolation::Bilinear) { - // get corner pixel values from (x, y, z) - // for 4d, we used north-east-south-west - // for 5d, we add top-bottom - int64_t ix_tnw = static_cast(std::floor(ix)); - int64_t iy_tnw = static_cast(std::floor(iy)); - int64_t iz_tnw = static_cast(std::floor(iz)); - - int64_t ix_tne = ix_tnw + 1; - int64_t iy_tne = iy_tnw; - int64_t iz_tne = iz_tnw; - - int64_t ix_tsw = ix_tnw; - int64_t iy_tsw = iy_tnw + 1; - int64_t iz_tsw = iz_tnw; - - int64_t ix_tse = ix_tnw + 1; - int64_t iy_tse = iy_tnw + 1; - int64_t iz_tse = iz_tnw; - - int64_t ix_bnw = ix_tnw; - int64_t iy_bnw = iy_tnw; - int64_t iz_bnw = iz_tnw + 1; - - int64_t ix_bne = ix_tnw + 1; - int64_t iy_bne = iy_tnw; - int64_t iz_bne = iz_tnw + 1; - - int64_t ix_bsw = ix_tnw; - int64_t iy_bsw = iy_tnw + 1; - int64_t iz_bsw = iz_tnw + 1; - - int64_t ix_bse = ix_tnw + 1; - int64_t iy_bse = iy_tnw + 1; - int64_t iz_bse = iz_tnw + 1; - - // get surfaces to each neighbor: - scalar_t tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz); - scalar_t tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz); - scalar_t tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz); - scalar_t tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz); - scalar_t bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse); - scalar_t bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw); - scalar_t bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne); - scalar_t bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw); - - scalar_t gix = static_cast(0), giy = static_cast(0), giz = static_cast(0); - scalar_t *gOut_ptr_NCDHW = gOut_ptr + n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW; - scalar_t *gInp_ptr_NC = gInp_ptr + n * gInp_sN; - scalar_t *inp_ptr_NC = inp_ptr_N; - // calculate bilinear weighted pixel value and set output pixel - for (int c = 0; c < C; ++c, gOut_ptr_NCDHW += gOut_sC, gInp_ptr_NC += gInp_sC, inp_ptr_NC += inp_sC) { - scalar_t gOut = *gOut_ptr_NCDHW; - - // calculate and set grad_input - safe_add_3d(gInp_ptr_NC, iz_tnw, iy_tnw, ix_tnw, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, tnw * gOut); - safe_add_3d(gInp_ptr_NC, iz_tne, iy_tne, ix_tne, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, tne * gOut); - safe_add_3d(gInp_ptr_NC, iz_tsw, iy_tsw, ix_tsw, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, tsw * gOut); - safe_add_3d(gInp_ptr_NC, iz_tse, iy_tse, ix_tse, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, tse * gOut); - safe_add_3d(gInp_ptr_NC, iz_bnw, iy_bnw, ix_bnw, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, bnw * gOut); - safe_add_3d(gInp_ptr_NC, iz_bne, iy_bne, ix_bne, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, bne * gOut); - safe_add_3d(gInp_ptr_NC, iz_bsw, iy_bsw, ix_bsw, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, bsw * gOut); - safe_add_3d(gInp_ptr_NC, iz_bse, iy_bse, ix_bse, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, bse * gOut); - - // calculate grad_grid - if (within_bounds_3d(iz_tnw, iy_tnw, ix_tnw, inp_D, inp_H, inp_W)) { - scalar_t tnw_val = inp_ptr_NC[iz_tnw * inp_sD + iy_tnw * inp_sH + ix_tnw * inp_sW]; - gix -= tnw_val * (iy_bse - iy) * (iz_bse - iz) * gOut; - giy -= tnw_val * (ix_bse - ix) * (iz_bse - iz) * gOut; - giz -= tnw_val * (ix_bse - ix) * (iy_bse - iy) * gOut; - } - if (within_bounds_3d(iz_tne, iy_tne, ix_tne, inp_D, inp_H, inp_W)) { - scalar_t tne_val = inp_ptr_NC[iz_tne * inp_sD + iy_tne * inp_sH + ix_tne * inp_sW]; - gix += tne_val * (iy_bsw - iy) * (iz_bsw - iz) * gOut; - giy -= tne_val * (ix - ix_bsw) * (iz_bsw - iz) * gOut; - giz -= tne_val * (ix - ix_bsw) * (iy_bsw - iy) * gOut; - } - if (within_bounds_3d(iz_tsw, iy_tsw, ix_tsw, inp_D, inp_H, inp_W)) { - scalar_t tsw_val = inp_ptr_NC[iz_tsw * inp_sD + iy_tsw * inp_sH + ix_tsw * inp_sW]; - gix -= tsw_val * (iy - iy_bne) * (iz_bne - iz) * gOut; - giy += tsw_val * (ix_bne - ix) * (iz_bne - iz) * gOut; - giz -= tsw_val * (ix_bne - ix) * (iy - iy_bne) * gOut; - } - if (within_bounds_3d(iz_tse, iy_tse, ix_tse, inp_D, inp_H, inp_W)) { - scalar_t tse_val = inp_ptr_NC[iz_tse * inp_sD + iy_tse * inp_sH + ix_tse * inp_sW]; - gix += tse_val * (iy - iy_bnw) * (iz_bnw - iz) * gOut; - giy += tse_val * (ix - ix_bnw) * (iz_bnw - iz) * gOut; - giz -= tse_val * (ix - ix_bnw) * (iy - iy_bnw) * gOut; - } - if (within_bounds_3d(iz_bnw, iy_bnw, ix_bnw, inp_D, inp_H, inp_W)) { - scalar_t bnw_val = inp_ptr_NC[iz_bnw * inp_sD + iy_bnw * inp_sH + ix_bnw * inp_sW]; - gix -= bnw_val * (iy_tse - iy) * (iz - iz_tse) * gOut; - giy -= bnw_val * (ix_tse - ix) * (iz - iz_tse) * gOut; - giz += bnw_val * (ix_tse - ix) * (iy_tse - iy) * gOut; - } - if (within_bounds_3d(iz_bne, iy_bne, ix_bne, inp_D, inp_H, inp_W)) { - scalar_t bne_val = inp_ptr_NC[iz_bne * inp_sD + iy_bne * inp_sH + ix_bne * inp_sW]; - gix += bne_val * (iy_tsw - iy) * (iz - iz_tsw) * gOut; - giy -= bne_val * (ix - ix_tsw) * (iz - iz_tsw) * gOut; - giz += bne_val * (ix - ix_tsw) * (iy_tsw - iy) * gOut; - } - if (within_bounds_3d(iz_bsw, iy_bsw, ix_bsw, inp_D, inp_H, inp_W)) { - scalar_t bsw_val = inp_ptr_NC[iz_bsw * inp_sD + iy_bsw * inp_sH + ix_bsw * inp_sW]; - gix -= bsw_val * (iy - iy_tne) * (iz - iz_tne) * gOut; - giy += bsw_val * (ix_tne - ix) * (iz - iz_tne) * gOut; - giz += bsw_val * (ix_tne - ix) * (iy - iy_tne) * gOut; - } - if (within_bounds_3d(iz_bse, iy_bse, ix_bse, inp_D, inp_H, inp_W)) { - scalar_t bse_val = inp_ptr_NC[iz_bse * inp_sD + iy_bse * inp_sH + ix_bse * inp_sW]; - gix += bse_val * (iy - iy_tnw) * (iz - iz_tnw) * gOut; - giy += bse_val * (ix - ix_tnw) * (iz - iz_tnw) * gOut; - giz += bse_val * (ix - ix_tnw) * (iy - iy_tnw) * gOut; - } - } - - // assuming grad_grid is contiguous - gGrid_ptr_NDHW[0] = gix_mult * gix; - gGrid_ptr_NDHW[1] = giy_mult * giy; - gGrid_ptr_NDHW[2] = giz_mult * giz; - } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { - int64_t ix_nearest = static_cast(std::round(ix)); - int64_t iy_nearest = static_cast(std::round(iy)); - int64_t iz_nearest = static_cast(std::round(iz)); - - // assign nearest neighor pixel value to output pixel - scalar_t *gOut_ptr_NCDHW = gOut_ptr + n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW; - scalar_t *gInp_ptr_NC = gInp_ptr + n * gInp_sN; - for (int c = 0; c < C; ++c, gOut_ptr_NCDHW += gOut_sC, gInp_ptr_NC += gInp_sC) { - // calculate and set grad_input - safe_add_3d(gInp_ptr_NC, iz_nearest, iy_nearest, ix_nearest, - gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, *gOut_ptr_NCDHW); - } - } - } - } - } - } - return std::make_tuple(grad_input, grad_grid); - } - -} // namespace - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -Tensor grid_sampler_2d_forward_cpu(const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, - bool align_corners) { - return AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "grid_sampler_2d_forward_cpu", [&] { - return grid_sampler_2d_forward_cpu_impl( - input, grid, static_cast(interpolation_mode), - static_cast(padding_mode), align_corners); - }); -} - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -Tensor grid_sampler_3d_forward_cpu(const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, - bool align_corners) { - return AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "grid_sampler_3d_forward_cpu", [&] { - return grid_sampler_3d_forward_cpu_impl( - input, grid, static_cast(interpolation_mode), - static_cast(padding_mode), align_corners); - }); -} - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -std::tuple -grid_sampler_2d_backward_cpu(const Tensor& grad_output, const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, bool align_corners) { - return AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "grid_sampler_2d_backward_cpu", [&] { - return grid_sampler_2d_backward_cpu_impl( - grad_output, input, grid, - static_cast(interpolation_mode), - static_cast(padding_mode), align_corners); - }); -} - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -std::tuple -grid_sampler_3d_backward_cpu(const Tensor& grad_output, const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, bool align_corners) { - return AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "grid_sampler_3d_backward_cpu", [&] { - return grid_sampler_3d_backward_cpu_impl( - grad_output, input, grid, - static_cast(interpolation_mode), - static_cast(padding_mode), align_corners); - }); -} - -} // namespace mmdetection diff --git a/mmdet/ops/grid_sampler/src/cpu/grid_sampler_cpu.h b/mmdet/ops/grid_sampler/src/cpu/grid_sampler_cpu.h deleted file mode 100644 index 3c9ae45063b..00000000000 --- a/mmdet/ops/grid_sampler/src/cpu/grid_sampler_cpu.h +++ /dev/null @@ -1,225 +0,0 @@ -// Modified from https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/GridSampler.h - -#pragma once - -#include -#include - -namespace mmdetection { - -namespace detail { - - enum class GridSamplerInterpolation {Bilinear, Nearest}; - enum class GridSamplerPadding {Zeros, Border, Reflection}; - -} // namespace detail - -using detail::GridSamplerInterpolation; -using detail::GridSamplerPadding; - -// Unnormalizes a coordinate from the -1 to +1 scale to its pixel index value, -// where we view each pixel as an area between (idx - 0.5) and (idx + 0.5). -// if align_corners: -1 and +1 get sent to the centers of the corner pixels -// -1 --> 0 -// +1 --> (size - 1) -// scale_factor = (size - 1) / 2 -// if not align_corners: -1 and +1 get sent to the image edges -// -1 --> -0.5 -// +1 --> (size - 1) + 0.5 == size - 0.5 -// scale_factor = size / 2 -template -static inline scalar_t grid_sampler_unnormalize(scalar_t coord, int64_t size, - bool align_corners) { - if (align_corners) { - // unnormalize coord from [-1, 1] to [0, size - 1] - return ((coord + 1) / 2) * (size - 1); - } else { - // unnormalize coord from [-1, 1] to [-0.5, size - 0.5] - return ((coord + 1) * size - 1) / 2; - } -} - -// grid_sampler_unnormalize_set_grad works the same as grid_sampler_unnormalize -// except that it also returns the `d output / d input` via pointer argument -// `grad_in`. -// This is useful in the backward pass of grid_sampler. -template -static inline scalar_t grid_sampler_unnormalize_set_grad(scalar_t coord, int64_t size, - bool align_corners, scalar_t *grad_in) { - if (align_corners) { - // unnormalize coord from [-1, 1] to [0, size - 1] - *grad_in = static_cast(size - 1) / 2; - return ((coord + 1) / 2) * (size - 1); - } else { - // unnormalize coord from [-1, 1] to [-0.5, size - 0.5] - *grad_in = static_cast(size) / 2; - return ((coord + 1) * size - 1) / 2; - } -} - -// Clips coordinates to between 0 and clip_limit - 1 -template -static inline scalar_t clip_coordinates(scalar_t in, int64_t clip_limit) { - return std::min(static_cast(clip_limit - 1), std::max(in, static_cast(0))); -} - -// clip_coordinates_set_grad works similarly to clip_coordinates except that -// it also returns the `d output / d input` via pointer argument `grad_in`. -// This is useful in the backward pass of grid_sampler. -template -static inline scalar_t clip_coordinates_set_grad(scalar_t in, int64_t clip_limit, - scalar_t *grad_in) { - if (in < static_cast(0)) { - *grad_in = static_cast(0); - return static_cast(0); - } else { - scalar_t max = static_cast(clip_limit - 1); - if (in > max) { - *grad_in = static_cast(0); - return max; - } else { - *grad_in = static_cast(1); - return in; - } - } -} - -// Reflects coordinates until they fall between low and high (inclusive). -// The bounds are passed as twice their value so that half-integer values -// can be represented as ints. -template -static inline scalar_t reflect_coordinates(scalar_t in, int64_t twice_low, - int64_t twice_high) { - if (twice_low == twice_high) { - return static_cast(0); - } - scalar_t min = static_cast(twice_low) / 2; - scalar_t span = static_cast(twice_high - twice_low) / 2; - in = std::fabs(in - min); - // `fmod` returns same sign as `in`, which is positive after the `fabs` above. - scalar_t extra = std::fmod(in, span); - int flips = static_cast(std::floor(in / span)); - if (flips % 2 == 0) { - return extra + min; - } else { - return span - extra + min; - } -} - -// reflect_coordinates_set_grad works similarly to reflect_coordinates except -// that it also returns the `d output / d input` via pointer argument -// `grad_in`. -// This is useful in the backward pass of grid_sampler. -template -static inline scalar_t reflect_coordinates_set_grad(scalar_t in, int64_t twice_low, - int64_t twice_high, scalar_t *grad_in) { - if (twice_low == twice_high) { - *grad_in = static_cast(0); - return static_cast(0); - } - int grad_in_mult_; - scalar_t min = static_cast(twice_low) / 2; - scalar_t span = static_cast(twice_high - twice_low) / 2; - in = in - min; - if (in < static_cast(0)) { - grad_in_mult_ = -1; - in = -in; - } else { - grad_in_mult_ = 1; - } - // `fmod` returns same sign as `in`, which is positive after the `if` above. - scalar_t extra = std::fmod(in, span); - int flips = static_cast(std::floor(in / span)); - if (flips % 2 == 0) { - *grad_in = static_cast(grad_in_mult_); - return extra + min; - } else { - *grad_in = static_cast(-grad_in_mult_); - return span - extra + min; - } -} - -// Computes the pixel source index value for a grid coordinate -template -static inline scalar_t grid_sampler_compute_source_index( - scalar_t coord, - int64_t size, - GridSamplerPadding padding_mode, - bool align_corners) { - coord = grid_sampler_unnormalize(coord, size, align_corners); - if (padding_mode == GridSamplerPadding::Border) { - // clip coordinates to image borders - coord = clip_coordinates(coord, size); - } else if (padding_mode == GridSamplerPadding::Reflection) { - // reflect coordinates by image borders - if (align_corners) { - coord = reflect_coordinates(coord, 0, 2*(size - 1)); - } else { - coord = reflect_coordinates(coord, -1, 2*size - 1); - // when align_corners=False, reflection does not auto clip coords - coord = clip_coordinates(coord, size); - } - } - return coord; -} - -// grid_sampler_compute_source_index_set_grad works similarly to -// grid_sampler_compute_source_index except that it also returns the -// `d output / d input` via pointer argument `grad_in`. -// This is useful in the backward pass of grid_sampler. -template -static inline scalar_t grid_sampler_compute_source_index_set_grad( - scalar_t coord, - int64_t size, - GridSamplerPadding padding_mode, - bool align_corners, - scalar_t *grad_in) { - scalar_t grad_clip, grad_refl; - coord = grid_sampler_unnormalize_set_grad(coord, size, align_corners, grad_in); - if (padding_mode == GridSamplerPadding::Border) { - // clip coordinates to image borders - coord = clip_coordinates_set_grad(coord, size, &grad_clip); - *grad_in = (*grad_in) * grad_clip; - } else if (padding_mode == GridSamplerPadding::Reflection) { - // reflect coordinates by image borders - if (align_corners) { - coord = reflect_coordinates_set_grad(coord, 0, 2*(size - 1), &grad_refl); - *grad_in = (*grad_in) * grad_refl; - } else { - coord = reflect_coordinates_set_grad(coord, -1, 2*size - 1, &grad_refl); - // when align_corners=False, reflection does not auto clip coords - coord = clip_coordinates_set_grad(coord, size, &grad_clip); - *grad_in = (*grad_in) * grad_refl * grad_clip; - } - } - return coord; -} - -static inline bool within_bounds_2d(int64_t h, int64_t w, int64_t H, int64_t W) { - return h >= 0 && h < H && w >= 0 && w < W; -} - -static inline bool within_bounds_3d(int64_t d, int64_t h, int64_t w, int64_t D, int64_t H, int64_t W) { - return d >= 0 && d < D && h >= 0 && h < H && w >= 0 && w < W; -} - -template -static inline void safe_add_2d(scalar_t *data, int64_t h, int64_t w, - int64_t sH, int64_t sW, int64_t H, int64_t W, - scalar_t delta) { - if (within_bounds_2d(h, w, H, W)) { - data[h * sH + w * sW] += delta; - } -} - -template -static inline void safe_add_3d(scalar_t *data, int64_t d, int64_t h, int64_t w, - int64_t sD, int64_t sH, int64_t sW, - int64_t D, int64_t H, int64_t W, - scalar_t delta) { - if (within_bounds_3d(d, h, w, D, H, W)) { - data[d * sD + h * sH + w * sW] += delta; - } -} - -} // namespace mmdetection diff --git a/mmdet/ops/grid_sampler/src/cuda/grid_sampler_cuda.cu b/mmdet/ops/grid_sampler/src/cuda/grid_sampler_cuda.cu deleted file mode 100644 index 2d747a0b897..00000000000 --- a/mmdet/ops/grid_sampler/src/cuda/grid_sampler_cuda.cu +++ /dev/null @@ -1,718 +0,0 @@ -// Modified from https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/cuda/GridSampler.cu - -#include -#include "grid_sampler_cuda.cuh" -#include -#include -#include -#include -#include -#include - -namespace mmdetection { - -using namespace at::cuda::detail; - -using mmdetection::detail::GridSamplerInterpolation; -using mmdetection::detail::GridSamplerPadding; - -namespace { - template - C10_LAUNCH_BOUNDS_1(1024) - __global__ void grid_sampler_2d_forward_kernel_cuda( - const int nthreads, - TensorInfo input, - TensorInfo grid, - TensorInfo output, - const GridSamplerInterpolation interpolation_mode, - const GridSamplerPadding padding_mode, - bool align_corners) { - - int C = input.sizes[1]; - int inp_H = input.sizes[2]; - int inp_W = input.sizes[3]; - int out_H = grid.sizes[1]; - int out_W = grid.sizes[2]; - int inp_sN = input.strides[0]; - int inp_sC = input.strides[1]; - int inp_sH = input.strides[2]; - int inp_sW = input.strides[3]; - int grid_sN = grid.strides[0]; - int grid_sH = grid.strides[1]; - int grid_sW = grid.strides[2]; - int grid_sCoor = grid.strides[3]; - int out_sN = output.strides[0]; - int out_sC = output.strides[1]; - int out_sH = output.strides[2]; - int out_sW = output.strides[3]; - - CUDA_KERNEL_LOOP(index, nthreads) { - const int w = index % out_W; - const int h = (index / out_W) % out_H; - const int n = index / (out_H * out_W); - const int grid_offset = n * grid_sN + h * grid_sH + w * grid_sW; - - // get the corresponding input x, y co-ordinates from grid - scalar_t ix = grid.data[grid_offset]; - scalar_t iy = grid.data[grid_offset + grid_sCoor]; - - ix = grid_sampler_compute_source_index(ix, inp_W, padding_mode, align_corners); - iy = grid_sampler_compute_source_index(iy, inp_H, padding_mode, align_corners); - - if (interpolation_mode == GridSamplerInterpolation::Bilinear) { - // get NE, NW, SE, SW pixel values from (x, y) - int ix_nw = static_cast(::floor(ix)); - int iy_nw = static_cast(::floor(iy)); - int ix_ne = ix_nw + 1; - int iy_ne = iy_nw; - int ix_sw = ix_nw; - int iy_sw = iy_nw + 1; - int ix_se = ix_nw + 1; - int iy_se = iy_nw + 1; - - // get surfaces to each neighbor: - scalar_t nw = (ix_se - ix) * (iy_se - iy); - scalar_t ne = (ix - ix_sw) * (iy_sw - iy); - scalar_t sw = (ix_ne - ix) * (iy - iy_ne); - scalar_t se = (ix - ix_nw) * (iy - iy_nw); - - // calculate bilinear weighted pixel value and set output pixel - auto inp_ptr_NC = input.data + n * inp_sN; - auto out_ptr_NCHW = output.data + n * out_sN + h * out_sH + w * out_sW; - for (int c = 0; c < C; ++c, inp_ptr_NC += inp_sC, out_ptr_NCHW += out_sC) { - *out_ptr_NCHW = static_cast(0); - if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) { - *out_ptr_NCHW += inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW] * nw; - } - if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) { - *out_ptr_NCHW += inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW] * ne; - } - if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) { - *out_ptr_NCHW += inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW] * sw; - } - if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) { - *out_ptr_NCHW += inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW] * se; - } - } - } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { - int ix_nearest = static_cast(::round(ix)); - int iy_nearest = static_cast(::round(iy)); - - // assign nearest neighor pixel value to output pixel - auto inp_ptr_NC = input.data + n * inp_sN; - auto out_ptr_NCHW = output.data + n * out_sN + h * out_sH + w * out_sW; - for (int c = 0; c < C; ++c, inp_ptr_NC += inp_sC, out_ptr_NCHW += out_sC) { - if (within_bounds_2d(iy_nearest, ix_nearest, inp_H, inp_W)) { - *out_ptr_NCHW = inp_ptr_NC[iy_nearest * inp_sH + ix_nearest * inp_sW]; - } else { - *out_ptr_NCHW = static_cast(0); - } - } - } - } - } - - template - C10_LAUNCH_BOUNDS_1(1024) - __global__ void grid_sampler_3d_forward_kernel_cuda( - const int nthreads, - TensorInfo input, - TensorInfo grid, - TensorInfo output, - const GridSamplerInterpolation interpolation_mode, - const GridSamplerPadding padding_mode, - bool align_corners) { - - int C = input.sizes[1]; - int inp_D = input.sizes[2]; - int inp_H = input.sizes[3]; - int inp_W = input.sizes[4]; - int out_D = grid.sizes[1]; - int out_H = grid.sizes[2]; - int out_W = grid.sizes[3]; - int inp_sN = input.strides[0]; - int inp_sC = input.strides[1]; - int inp_sD = input.strides[2]; - int inp_sH = input.strides[3]; - int inp_sW = input.strides[4]; - int grid_sN = grid.strides[0]; - int grid_sD = grid.strides[1]; - int grid_sH = grid.strides[2]; - int grid_sW = grid.strides[3]; - int grid_sCoor = grid.strides[4]; - int out_sN = output.strides[0]; - int out_sC = output.strides[1]; - int out_sD = output.strides[2]; - int out_sH = output.strides[3]; - int out_sW = output.strides[4]; - - CUDA_KERNEL_LOOP(index, nthreads) { - const int w = index % out_W; - const int h = (index / out_W) % out_H; - const int d = (index / (out_H * out_W)) % out_D; - const int n = index / (out_D * out_H * out_W); - const int grid_offset = n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW; - - // get the corresponding input x, y, z co-ordinates from grid - scalar_t ix = grid.data[grid_offset]; - scalar_t iy = grid.data[grid_offset + grid_sCoor]; - scalar_t iz = grid.data[grid_offset + 2 * grid_sCoor]; - - ix = grid_sampler_compute_source_index(ix, inp_W, padding_mode, align_corners); - iy = grid_sampler_compute_source_index(iy, inp_H, padding_mode, align_corners); - iz = grid_sampler_compute_source_index(iz, inp_D, padding_mode, align_corners); - - if (interpolation_mode == GridSamplerInterpolation::Bilinear) { - // get corner pixel values from (x, y, z) - // for 4d, we used north-east-south-west - // for 5d, we add top-bottom - int ix_tnw = static_cast(::floor(ix)); - int iy_tnw = static_cast(::floor(iy)); - int iz_tnw = static_cast(::floor(iz)); - - int ix_tne = ix_tnw + 1; - int iy_tne = iy_tnw; - int iz_tne = iz_tnw; - - int ix_tsw = ix_tnw; - int iy_tsw = iy_tnw + 1; - int iz_tsw = iz_tnw; - - int ix_tse = ix_tnw + 1; - int iy_tse = iy_tnw + 1; - int iz_tse = iz_tnw; - - int ix_bnw = ix_tnw; - int iy_bnw = iy_tnw; - int iz_bnw = iz_tnw + 1; - - int ix_bne = ix_tnw + 1; - int iy_bne = iy_tnw; - int iz_bne = iz_tnw + 1; - - int ix_bsw = ix_tnw; - int iy_bsw = iy_tnw + 1; - int iz_bsw = iz_tnw + 1; - - int ix_bse = ix_tnw + 1; - int iy_bse = iy_tnw + 1; - int iz_bse = iz_tnw + 1; - - // get surfaces to each neighbor: - scalar_t tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz); - scalar_t tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz); - scalar_t tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz); - scalar_t tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz); - scalar_t bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse); - scalar_t bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw); - scalar_t bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne); - scalar_t bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw); - - auto inp_ptr_NC = input.data + n * inp_sN; - auto out_ptr_NCDHW = output.data + n * out_sN + d * out_sD + h * out_sH + w * out_sW; - for (int c = 0; c < C; ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) { - // (c, iz_tnw, iy_tnw, ix_tnw) * tnw + (c, iz_tne, iy_tne, ix_tne) * tne - // + (c, iz_tsw, iy_tsw, ix_tsw) * tsw + (c, iz_tse, iy_tse, ix_tse) * tse - // + (c, iz_bnw, iy_bnw, ix_bnw) * bnw + (c, iz_bne, iy_bne, ix_bne) * bne - // + (c, iz_bsw, iy_bsw, ix_bsw) * bsw + (c, iz_bse, iy_bse, ix_bse) * bse - *out_ptr_NCDHW = static_cast(0); - if (within_bounds_3d(iz_tnw, iy_tnw, ix_tnw, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_tnw * inp_sD + iy_tnw * inp_sH + ix_tnw * inp_sW] * tnw; - } - if (within_bounds_3d(iz_tne, iy_tne, ix_tne, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_tne * inp_sD + iy_tne * inp_sH + ix_tne * inp_sW] * tne; - } - if (within_bounds_3d(iz_tsw, iy_tsw, ix_tsw, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_tsw * inp_sD + iy_tsw * inp_sH + ix_tsw * inp_sW] * tsw; - } - if (within_bounds_3d(iz_tse, iy_tse, ix_tse, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_tse * inp_sD + iy_tse * inp_sH + ix_tse * inp_sW] * tse; - } - if (within_bounds_3d(iz_bnw, iy_bnw, ix_bnw, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_bnw * inp_sD + iy_bnw * inp_sH + ix_bnw * inp_sW] * bnw; - } - if (within_bounds_3d(iz_bne, iy_bne, ix_bne, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_bne * inp_sD + iy_bne * inp_sH + ix_bne * inp_sW] * bne; - } - if (within_bounds_3d(iz_bsw, iy_bsw, ix_bsw, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_bsw * inp_sD + iy_bsw * inp_sH + ix_bsw * inp_sW] * bsw; - } - if (within_bounds_3d(iz_bse, iy_bse, ix_bse, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW += inp_ptr_NC[iz_bse * inp_sD + iy_bse * inp_sH + ix_bse * inp_sW] * bse; - } - } - } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { - int ix_nearest = static_cast(::round(ix)); - int iy_nearest = static_cast(::round(iy)); - int iz_nearest = static_cast(::round(iz)); - - // assign nearest neighor pixel value to output pixel - auto inp_ptr_NC = input.data + n * inp_sN; - auto out_ptr_NCDHW = output.data + n * out_sN + d * out_sD + h * out_sH + w * out_sW; - for (int c = 0; c < C; ++c, inp_ptr_NC += inp_sC, out_ptr_NCDHW += out_sC) { - if (within_bounds_3d(iz_nearest, iy_nearest, ix_nearest, inp_D, inp_H, inp_W)) { - *out_ptr_NCDHW = inp_ptr_NC[iz_nearest * inp_sD + iy_nearest * inp_sH + ix_nearest * inp_sW]; - } else { - *out_ptr_NCDHW = static_cast(0); - } - } - } - } - } - - template - C10_LAUNCH_BOUNDS_1(1024) - __global__ void grid_sampler_2d_backward_kernel_cuda( - const int nthreads, - TensorInfo grad_output, - TensorInfo input, - TensorInfo grid, - TensorInfo grad_input, // initialized to zeros - TensorInfo grad_grid, // initialized to empty - const GridSamplerInterpolation interpolation_mode, - const GridSamplerPadding padding_mode, - bool align_corners) { - - int C = input.sizes[1]; - int inp_H = input.sizes[2]; - int inp_W = input.sizes[3]; - int out_H = grid.sizes[1]; - int out_W = grid.sizes[2]; - int inp_sN = input.strides[0]; - int inp_sC = input.strides[1]; - int inp_sH = input.strides[2]; - int inp_sW = input.strides[3]; - int grid_sN = grid.strides[0]; - int grid_sH = grid.strides[1]; - int grid_sW = grid.strides[2]; - int grid_sCoor = grid.strides[3]; - int gOut_sN = grad_output.strides[0]; - int gOut_sC = grad_output.strides[1]; - int gOut_sH = grad_output.strides[2]; - int gOut_sW = grad_output.strides[3]; - int gInp_sN = grad_input.strides[0]; - int gInp_sC = grad_input.strides[1]; - int gInp_sH = grad_input.strides[2]; - int gInp_sW = grad_input.strides[3]; - int gGrid_sW = grad_grid.strides[2]; - - CUDA_KERNEL_LOOP(index, nthreads) { - const int w = index % out_W; - const int h = (index / out_W) % out_H; - const int n = index / (out_H * out_W); - const int grid_offset = n * grid_sN + h * grid_sH + w * grid_sW; - - // get the corresponding input x, y co-ordinates from grid - scalar_t ix = grid.data[grid_offset]; - scalar_t iy = grid.data[grid_offset + grid_sCoor]; - - // multipliers for gradients on ix and iy - scalar_t gix_mult, giy_mult; - ix = grid_sampler_compute_source_index_set_grad(ix, inp_W, padding_mode, align_corners, &gix_mult); - iy = grid_sampler_compute_source_index_set_grad(iy, inp_H, padding_mode, align_corners, &giy_mult); - - if (interpolation_mode == GridSamplerInterpolation::Bilinear) { - // get NE, NW, SE, SW pixel values from (x, y) - int ix_nw = static_cast(::floor(ix)); - int iy_nw = static_cast(::floor(iy)); - int ix_ne = ix_nw + 1; - int iy_ne = iy_nw; - int ix_sw = ix_nw; - int iy_sw = iy_nw + 1; - int ix_se = ix_nw + 1; - int iy_se = iy_nw + 1; - - // get surfaces to each neighbor: - scalar_t nw = (ix_se - ix) * (iy_se - iy); - scalar_t ne = (ix - ix_sw) * (iy_sw - iy); - scalar_t sw = (ix_ne - ix) * (iy - iy_ne); - scalar_t se = (ix - ix_nw) * (iy - iy_nw); - - scalar_t gix = static_cast(0), giy = static_cast(0); - scalar_t *gOut_ptr_NCHW = grad_output.data + n * gOut_sN + h * gOut_sH + w * gOut_sW; - scalar_t *gInp_ptr_NC = grad_input.data + n * gInp_sN; - scalar_t *inp_ptr_NC = input.data + n * inp_sN; - for (int c = 0; c < C; ++c, inp_ptr_NC += inp_sC, gInp_ptr_NC += gInp_sC, gOut_ptr_NCHW += gOut_sC) { - scalar_t gOut = *gOut_ptr_NCHW; - - // calculate and set grad_input - safe_add_2d(gInp_ptr_NC, iy_nw, ix_nw, gInp_sH, gInp_sW, inp_H, inp_W, nw * gOut); - safe_add_2d(gInp_ptr_NC, iy_ne, ix_ne, gInp_sH, gInp_sW, inp_H, inp_W, ne * gOut); - safe_add_2d(gInp_ptr_NC, iy_sw, ix_sw, gInp_sH, gInp_sW, inp_H, inp_W, sw * gOut); - safe_add_2d(gInp_ptr_NC, iy_se, ix_se, gInp_sH, gInp_sW, inp_H, inp_W, se * gOut); - - // calculate grad_grid - if (within_bounds_2d(iy_nw, ix_nw, inp_H, inp_W)) { - scalar_t nw_val = inp_ptr_NC[iy_nw * inp_sH + ix_nw * inp_sW]; - gix -= nw_val * (iy_se - iy) * gOut; - giy -= nw_val * (ix_se - ix) * gOut; - } - if (within_bounds_2d(iy_ne, ix_ne, inp_H, inp_W)) { - scalar_t ne_val = inp_ptr_NC[iy_ne * inp_sH + ix_ne * inp_sW]; - gix += ne_val * (iy_sw - iy) * gOut; - giy -= ne_val * (ix - ix_sw) * gOut; - } - if (within_bounds_2d(iy_sw, ix_sw, inp_H, inp_W)) { - scalar_t sw_val = inp_ptr_NC[iy_sw * inp_sH + ix_sw * inp_sW]; - gix -= sw_val * (iy - iy_ne) * gOut; - giy += sw_val * (ix_ne - ix) * gOut; - } - if (within_bounds_2d(iy_se, ix_se, inp_H, inp_W)) { - scalar_t se_val = inp_ptr_NC[iy_se * inp_sH + ix_se * inp_sW]; - gix += se_val * (iy - iy_nw) * gOut; - giy += se_val * (ix - ix_nw) * gOut; - } - } - - // assuming grad_grid is contiguous - // thus we can - // 1. use index with gGrid_sW to directly compute gGrid_ptr_NHW - // 2. directly assign to gGrid_ptr_NHW[0], gGrid_ptr_NHW[1] - scalar_t *gGrid_ptr_NHW = grad_grid.data + index * gGrid_sW; - gGrid_ptr_NHW[0] = gix_mult * gix; - gGrid_ptr_NHW[1] = giy_mult * giy; - } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { - int ix_nearest = static_cast(::round(ix)); - int iy_nearest = static_cast(::round(iy)); - - // assign nearest neighor pixel value to output pixel - scalar_t *gOut_ptr_NCHW = grad_output.data + n * gOut_sN + h * gOut_sH + w * gOut_sW; - scalar_t *gInp_ptr_NC = grad_input.data + n * gInp_sN; - for (int c = 0; c < C; ++c, gInp_ptr_NC += gInp_sC, gOut_ptr_NCHW += gOut_sC) { - // calculate and set grad_input - safe_add_2d(gInp_ptr_NC, iy_nearest, ix_nearest, gInp_sH, gInp_sW, inp_H, inp_W, *gOut_ptr_NCHW); - } - - // assuming grad_grid is contiguous - // thus we can - // 1. use index with gGrid_sW to directly compute gGrid_ptr_NHW - // 2. directly assign to gGrid_ptr_NHW[0], gGrid_ptr_NHW[1] - scalar_t *gGrid_ptr_NHW = grad_grid.data + index * gGrid_sW; - gGrid_ptr_NHW[0] = static_cast(0); - gGrid_ptr_NHW[1] = static_cast(0); - } - } - } - - template - C10_LAUNCH_BOUNDS_1(1024) - __global__ void grid_sampler_3d_backward_kernel_cuda( - const int nthreads, - TensorInfo grad_output, - TensorInfo input, - TensorInfo grid, - TensorInfo grad_input, // initialized to zeros - TensorInfo grad_grid, // initialized to empty - const GridSamplerInterpolation interpolation_mode, - const GridSamplerPadding padding_mode, - bool align_corners) { - - int C = input.sizes[1]; - int inp_D = input.sizes[2]; - int inp_H = input.sizes[3]; - int inp_W = input.sizes[4]; - int out_D = grid.sizes[1]; - int out_H = grid.sizes[2]; - int out_W = grid.sizes[3]; - int inp_sN = input.strides[0]; - int inp_sC = input.strides[1]; - int inp_sD = input.strides[2]; - int inp_sH = input.strides[3]; - int inp_sW = input.strides[4]; - int grid_sN = grid.strides[0]; - int grid_sD = grid.strides[1]; - int grid_sH = grid.strides[2]; - int grid_sW = grid.strides[3]; - int grid_sCoor = grid.strides[4]; - int gOut_sN = grad_output.strides[0]; - int gOut_sC = grad_output.strides[1]; - int gOut_sD = grad_output.strides[2]; - int gOut_sH = grad_output.strides[3]; - int gOut_sW = grad_output.strides[4]; - int gInp_sN = grad_input.strides[0]; - int gInp_sC = grad_input.strides[1]; - int gInp_sD = grad_input.strides[2]; - int gInp_sH = grad_input.strides[3]; - int gInp_sW = grad_input.strides[4]; - int gGrid_sW = grad_grid.strides[3]; - - CUDA_KERNEL_LOOP(index, nthreads) { - const int w = index % out_W; - const int h = (index / out_W) % out_H; - const int d = (index / (out_H * out_W)) % out_D; - const int n = index / (out_D * out_H * out_W); - const int grid_offset = n * grid_sN + d * grid_sD + h * grid_sH + w * grid_sW; - - // get the corresponding input x, y, z co-ordinates from grid - scalar_t ix = grid.data[grid_offset]; - scalar_t iy = grid.data[grid_offset + grid_sCoor]; - scalar_t iz = grid.data[grid_offset + 2 * grid_sCoor]; - - // multipliers for gradients on ix, iy, and iz - scalar_t gix_mult, giy_mult, giz_mult; - ix = grid_sampler_compute_source_index_set_grad(ix, inp_W, padding_mode, align_corners, &gix_mult); - iy = grid_sampler_compute_source_index_set_grad(iy, inp_H, padding_mode, align_corners, &giy_mult); - iz = grid_sampler_compute_source_index_set_grad(iz, inp_D, padding_mode, align_corners, &giz_mult); - - if (interpolation_mode == GridSamplerInterpolation::Bilinear) { - // get corner pixel values from (x, y, z) - // for 4d, we used north-east-south-west - // for 5d, we add top-bottom - int ix_tnw = static_cast(::floor(ix)); - int iy_tnw = static_cast(::floor(iy)); - int iz_tnw = static_cast(::floor(iz)); - - int ix_tne = ix_tnw + 1; - int iy_tne = iy_tnw; - int iz_tne = iz_tnw; - - int ix_tsw = ix_tnw; - int iy_tsw = iy_tnw + 1; - int iz_tsw = iz_tnw; - - int ix_tse = ix_tnw + 1; - int iy_tse = iy_tnw + 1; - int iz_tse = iz_tnw; - - int ix_bnw = ix_tnw; - int iy_bnw = iy_tnw; - int iz_bnw = iz_tnw + 1; - - int ix_bne = ix_tnw + 1; - int iy_bne = iy_tnw; - int iz_bne = iz_tnw + 1; - - int ix_bsw = ix_tnw; - int iy_bsw = iy_tnw + 1; - int iz_bsw = iz_tnw + 1; - - int ix_bse = ix_tnw + 1; - int iy_bse = iy_tnw + 1; - int iz_bse = iz_tnw + 1; - - // get surfaces to each neighbor: - scalar_t tnw = (ix_bse - ix) * (iy_bse - iy) * (iz_bse - iz); - scalar_t tne = (ix - ix_bsw) * (iy_bsw - iy) * (iz_bsw - iz); - scalar_t tsw = (ix_bne - ix) * (iy - iy_bne) * (iz_bne - iz); - scalar_t tse = (ix - ix_bnw) * (iy - iy_bnw) * (iz_bnw - iz); - scalar_t bnw = (ix_tse - ix) * (iy_tse - iy) * (iz - iz_tse); - scalar_t bne = (ix - ix_tsw) * (iy_tsw - iy) * (iz - iz_tsw); - scalar_t bsw = (ix_tne - ix) * (iy - iy_tne) * (iz - iz_tne); - scalar_t bse = (ix - ix_tnw) * (iy - iy_tnw) * (iz - iz_tnw); - - scalar_t gix = static_cast(0), giy = static_cast(0), giz = static_cast(0); - scalar_t *gOut_ptr_NCDHW = grad_output.data + n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW; - scalar_t *gInp_ptr_NC = grad_input.data + n * gInp_sN; - scalar_t *inp_ptr_NC = input.data + n * inp_sN; - // calculate bilinear weighted pixel value and set output pixel - for (int c = 0; c < C; ++c, gOut_ptr_NCDHW += gOut_sC, gInp_ptr_NC += gInp_sC, inp_ptr_NC += inp_sC) { - scalar_t gOut = *gOut_ptr_NCDHW; - - // calculate and set grad_input - safe_add_3d(gInp_ptr_NC, iz_tnw, iy_tnw, ix_tnw, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, tnw * gOut); - safe_add_3d(gInp_ptr_NC, iz_tne, iy_tne, ix_tne, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, tne * gOut); - safe_add_3d(gInp_ptr_NC, iz_tsw, iy_tsw, ix_tsw, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, tsw * gOut); - safe_add_3d(gInp_ptr_NC, iz_tse, iy_tse, ix_tse, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, tse * gOut); - safe_add_3d(gInp_ptr_NC, iz_bnw, iy_bnw, ix_bnw, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, bnw * gOut); - safe_add_3d(gInp_ptr_NC, iz_bne, iy_bne, ix_bne, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, bne * gOut); - safe_add_3d(gInp_ptr_NC, iz_bsw, iy_bsw, ix_bsw, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, bsw * gOut); - safe_add_3d(gInp_ptr_NC, iz_bse, iy_bse, ix_bse, gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, bse * gOut); - - // calculate grad_grid - if (within_bounds_3d(iz_tnw, iy_tnw, ix_tnw, inp_D, inp_H, inp_W)) { - scalar_t tnw_val = inp_ptr_NC[iz_tnw * inp_sD + iy_tnw * inp_sH + ix_tnw * inp_sW]; - gix -= tnw_val * (iy_bse - iy) * (iz_bse - iz) * gOut; - giy -= tnw_val * (ix_bse - ix) * (iz_bse - iz) * gOut; - giz -= tnw_val * (ix_bse - ix) * (iy_bse - iy) * gOut; - } - if (within_bounds_3d(iz_tne, iy_tne, ix_tne, inp_D, inp_H, inp_W)) { - scalar_t tne_val = inp_ptr_NC[iz_tne * inp_sD + iy_tne * inp_sH + ix_tne * inp_sW]; - gix += tne_val * (iy_bsw - iy) * (iz_bsw - iz) * gOut; - giy -= tne_val * (ix - ix_bsw) * (iz_bsw - iz) * gOut; - giz -= tne_val * (ix - ix_bsw) * (iy_bsw - iy) * gOut; - } - if (within_bounds_3d(iz_tsw, iy_tsw, ix_tsw, inp_D, inp_H, inp_W)) { - scalar_t tsw_val = inp_ptr_NC[iz_tsw * inp_sD + iy_tsw * inp_sH + ix_tsw * inp_sW]; - gix -= tsw_val * (iy - iy_bne) * (iz_bne - iz) * gOut; - giy += tsw_val * (ix_bne - ix) * (iz_bne - iz) * gOut; - giz -= tsw_val * (ix_bne - ix) * (iy - iy_bne) * gOut; - } - if (within_bounds_3d(iz_tse, iy_tse, ix_tse, inp_D, inp_H, inp_W)) { - scalar_t tse_val = inp_ptr_NC[iz_tse * inp_sD + iy_tse * inp_sH + ix_tse * inp_sW]; - gix += tse_val * (iy - iy_bnw) * (iz_bnw - iz) * gOut; - giy += tse_val * (ix - ix_bnw) * (iz_bnw - iz) * gOut; - giz -= tse_val * (ix - ix_bnw) * (iy - iy_bnw) * gOut; - } - if (within_bounds_3d(iz_bnw, iy_bnw, ix_bnw, inp_D, inp_H, inp_W)) { - scalar_t bnw_val = inp_ptr_NC[iz_bnw * inp_sD + iy_bnw * inp_sH + ix_bnw * inp_sW]; - gix -= bnw_val * (iy_tse - iy) * (iz - iz_tse) * gOut; - giy -= bnw_val * (ix_tse - ix) * (iz - iz_tse) * gOut; - giz += bnw_val * (ix_tse - ix) * (iy_tse - iy) * gOut; - } - if (within_bounds_3d(iz_bne, iy_bne, ix_bne, inp_D, inp_H, inp_W)) { - scalar_t bne_val = inp_ptr_NC[iz_bne * inp_sD + iy_bne * inp_sH + ix_bne * inp_sW]; - gix += bne_val * (iy_tsw - iy) * (iz - iz_tsw) * gOut; - giy -= bne_val * (ix - ix_tsw) * (iz - iz_tsw) * gOut; - giz += bne_val * (ix - ix_tsw) * (iy_tsw - iy) * gOut; - } - if (within_bounds_3d(iz_bsw, iy_bsw, ix_bsw, inp_D, inp_H, inp_W)) { - scalar_t bsw_val = inp_ptr_NC[iz_bsw * inp_sD + iy_bsw * inp_sH + ix_bsw * inp_sW]; - gix -= bsw_val * (iy - iy_tne) * (iz - iz_tne) * gOut; - giy += bsw_val * (ix_tne - ix) * (iz - iz_tne) * gOut; - giz += bsw_val * (ix_tne - ix) * (iy - iy_tne) * gOut; - } - if (within_bounds_3d(iz_bse, iy_bse, ix_bse, inp_D, inp_H, inp_W)) { - scalar_t bse_val = inp_ptr_NC[iz_bse * inp_sD + iy_bse * inp_sH + ix_bse * inp_sW]; - gix += bse_val * (iy - iy_tnw) * (iz - iz_tnw) * gOut; - giy += bse_val * (ix - ix_tnw) * (iz - iz_tnw) * gOut; - giz += bse_val * (ix - ix_tnw) * (iy - iy_tnw) * gOut; - } - } - - // assuming grad_grid is contiguous - // thus we can - // 1. use index with gGrid_sW to directly compute gGrid_ptr_NDHW - // 2. directly assign to gGrid_ptr_NDHW[0], gGrid_ptr_NDHW[1], gGrid_ptr_NDHW[2] - scalar_t *gGrid_ptr_NDHW = grad_grid.data + index * gGrid_sW; - gGrid_ptr_NDHW[0] = gix_mult * gix; - gGrid_ptr_NDHW[1] = giy_mult * giy; - gGrid_ptr_NDHW[2] = giz_mult * giz; - } else if (interpolation_mode == GridSamplerInterpolation::Nearest) { - int ix_nearest = static_cast(::round(ix)); - int iy_nearest = static_cast(::round(iy)); - int iz_nearest = static_cast(::round(iz)); - - // assign nearest neighor pixel value to output pixel - scalar_t *gOut_ptr_NCDHW = grad_output.data + n * gOut_sN + d * gOut_sD + h * gOut_sH + w * gOut_sW; - scalar_t *gInp_ptr_NC = grad_input.data + n * gInp_sN; - for (int c = 0; c < C; ++c, gOut_ptr_NCDHW += gOut_sC, gInp_ptr_NC += gInp_sC) { - // calculate and set grad_input - safe_add_3d(gInp_ptr_NC, iz_nearest, iy_nearest, ix_nearest, - gInp_sD, gInp_sH, gInp_sW, inp_D, inp_H, inp_W, *gOut_ptr_NCDHW); - } - - // assuming grad_grid is contiguous - // thus we can - // 1. use index with gGrid_sW to directly compute gGrid_ptr_NDHW - // 2. directly assign to gGrid_ptr_NDHW[0], gGrid_ptr_NDHW[1], gGrid_ptr_NDHW[2] - scalar_t *gGrid_ptr_NDHW = grad_grid.data + index * gGrid_sW; - gGrid_ptr_NDHW[0] = static_cast(0); - gGrid_ptr_NDHW[1] = static_cast(0); - gGrid_ptr_NDHW[2] = static_cast(0); - } - } - } -} // namespace - -using namespace at; -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -Tensor grid_sampler_2d_forward_cuda(const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, - bool align_corners) { - auto N = input.size(0); - auto H = grid.size(1); - auto W = grid.size(2); - auto output = at::empty({N, input.size(1), H, W}, input.options()); - int count = static_cast(N * H * W); - if (count > 0) { - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "grid_sampler_2d_forward_cuda", [&] { - grid_sampler_2d_forward_kernel_cuda - <<>>( - count, - getTensorInfo(input), - getTensorInfo(grid), - getTensorInfo(output), - static_cast(interpolation_mode), - static_cast(padding_mode), - align_corners); - }); - } - return output; -} - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -Tensor grid_sampler_3d_forward_cuda(const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, - bool align_corners) { - auto N = input.size(0); - auto D = grid.size(1); - auto H = grid.size(2); - auto W = grid.size(3); - auto output = at::empty({N, input.size(1), D, H, W}, input.options()); - int count = static_cast(N * D * H * W); - if (count > 0) { - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "grid_sampler_3d_forward_cuda", [&] { - grid_sampler_3d_forward_kernel_cuda - <<>>( - count, - getTensorInfo(input), - getTensorInfo(grid), - getTensorInfo(output), - static_cast(interpolation_mode), - static_cast(padding_mode), - align_corners); - }); - } - return output; -} - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -std::tuple -grid_sampler_2d_backward_cuda(const Tensor& grad_output, const Tensor& input, - const Tensor& grid, int64_t interpolation_mode, - int64_t padding_mode, bool align_corners) { - auto N = input.size(0); - auto H = grid.size(1); - auto W = grid.size(2); - auto grad_input = at::zeros_like(input); - auto grad_grid = at::empty_like(grid); - int count = static_cast(N * H * W); - if (count > 0) { - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "grid_sampler_2d_backward_cuda", [&] { - grid_sampler_2d_backward_kernel_cuda - <<>>( - count, - getTensorInfo(grad_output), - getTensorInfo(input), - getTensorInfo(grid), - getTensorInfo(grad_input), - getTensorInfo(grad_grid), - static_cast(interpolation_mode), - static_cast(padding_mode), - align_corners); - }); - } - return std::make_tuple(grad_input, grad_grid); -} - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -std::tuple -grid_sampler_3d_backward_cuda(const Tensor& grad_output, const Tensor& input, - const Tensor& grid, int64_t interpolation_mode, int64_t padding_mode, - bool align_corners) { - auto N = input.size(0); - auto D = grid.size(1); - auto H = grid.size(2); - auto W = grid.size(3); - auto grad_input = at::zeros_like(input); - auto grad_grid = at::empty_like(grid); - int count = static_cast(N * D * H * W); - if (count > 0) { - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "grid_sampler_3d_backward_cuda", [&] { - grid_sampler_3d_backward_kernel_cuda - <<>>( - count, - getTensorInfo(grad_output), - getTensorInfo(input), - getTensorInfo(grid), - getTensorInfo(grad_input), - getTensorInfo(grad_grid), - static_cast(interpolation_mode), - static_cast(padding_mode), - align_corners); - }); - } - return std::make_tuple(grad_input, grad_grid); -} - -} // namespace mmdetection diff --git a/mmdet/ops/grid_sampler/src/cuda/grid_sampler_cuda.cuh b/mmdet/ops/grid_sampler/src/cuda/grid_sampler_cuda.cuh deleted file mode 100644 index a84fa7c076e..00000000000 --- a/mmdet/ops/grid_sampler/src/cuda/grid_sampler_cuda.cuh +++ /dev/null @@ -1,233 +0,0 @@ -// Modified from https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/cuda/GridSampler.cuh - -#include -#include -#include -#include - -namespace mmdetection { - -namespace detail { - - enum class GridSamplerInterpolation {Bilinear, Nearest}; - enum class GridSamplerPadding {Zeros, Border, Reflection}; - -} // namespace detail - -using detail::GridSamplerInterpolation; -using detail::GridSamplerPadding; - -// Unnormalizes a coordinate from the -1 to +1 scale to its pixel index value, -// where we view each pixel as an area between (idx - 0.5) and (idx + 0.5). -// if align_corners: -1 and +1 get sent to the centers of the corner pixels -// -1 --> 0 -// +1 --> (size - 1) -// scale_factor = (size - 1) / 2 -// if not align_corners: -1 and +1 get sent to the image edges -// -1 --> -0.5 -// +1 --> (size - 1) + 0.5 == size - 0.5 -// scale_factor = size / 2 -template -static __forceinline__ __device__ -scalar_t grid_sampler_unnormalize(scalar_t coord, int size, bool align_corners) { - if (align_corners) { - // unnormalize coord from [-1, 1] to [0, size - 1] - return ((coord + 1.f) / 2) * (size - 1); - } else { - // unnormalize coord from [-1, 1] to [-0.5, size - 0.5] - return ((coord + 1.f) * size - 1) / 2; - } -} - -// grid_sampler_unnormalize_set_grad works the same as grid_sampler_unnormalize -// except that it also returns the `d output / d input` via pointer argument -// `grad_in`. -// This is useful in the backward pass of grid_sampler. -template -static __forceinline__ __device__ -scalar_t grid_sampler_unnormalize_set_grad(scalar_t coord, int size, - bool align_corners, scalar_t *grad_in) { - if (align_corners) { - // unnormalize coord from [-1, 1] to [0, size - 1] - *grad_in = static_cast(size - 1) / 2; - return ((coord + 1.f) / 2) * (size - 1); - } else { - // unnormalize coord from [-1, 1] to [-0.5, size - 0.5] - *grad_in = static_cast(size) / 2; - return ((coord + 1.f) * size - 1) / 2; - } -} - -// Clips coordinates to between 0 and clip_limit - 1 -template -static __forceinline__ __device__ -scalar_t clip_coordinates(scalar_t in, int clip_limit) { - return ::min(static_cast(clip_limit - 1), ::max(in, static_cast(0))); -} - -// clip_coordinates_set_grad works similarly to clip_coordinates except that -// it also returns the `d output / d input` via pointer argument `grad_in`. -// This is useful in the backward pass of grid_sampler. -template -static __forceinline__ __device__ -scalar_t clip_coordinates_set_grad(scalar_t in, int clip_limit, scalar_t *grad_in) { - if (in < static_cast(0)) { - *grad_in = static_cast(0); - return static_cast(0); - } else { - scalar_t max = static_cast(clip_limit - 1); - if (in > max) { - *grad_in = static_cast(0); - return max; - } else { - *grad_in = static_cast(1); - return in; - } - } -} - -// Reflects coordinates until they fall between low and high (inclusive). -// The bounds are passed as twice their value so that half-integer values -// can be represented as ints. -template -static __forceinline__ __device__ -scalar_t reflect_coordinates(scalar_t in, int twice_low, int twice_high) { - if (twice_low == twice_high) { - return static_cast(0); - } - scalar_t min = static_cast(twice_low) / 2; - scalar_t span = static_cast(twice_high - twice_low) / 2; - in = ::fabs(in - min); - // `fmod` returns same sign as `in`, which is positive after the `fabs` above. - scalar_t extra = ::fmod(in, span); - int flips = static_cast(::floor(in / span)); - if (flips % 2 == 0) { - return extra + min; - } else { - return span - extra + min; - } -} - -// reflect_coordinates_set_grad works similarly to reflect_coordinates except -// that it also returns the `d output / d input` via pointer argument -// `grad_in`. -// This is useful in the backward pass of grid_sampler. -template -static __forceinline__ __device__ -scalar_t reflect_coordinates_set_grad(scalar_t in, int twice_low, int twice_high, - scalar_t *grad_in) { - if (twice_low == twice_high) { - *grad_in = static_cast(0); - return static_cast(0); - } - int grad_in_mult_; - scalar_t min = static_cast(twice_low) / 2; - scalar_t span = static_cast(twice_high - twice_low) / 2; - in = in - min; - if (in < static_cast(0)) { - grad_in_mult_ = -1; - in = -in; - } else { - grad_in_mult_ = 1; - } - // `fmod` returns same sign as `in`, which is positive after the `if` above. - scalar_t extra = ::fmod(in, span); - int flips = static_cast(::floor(in / span)); - if (flips % 2 == 0) { - *grad_in = static_cast(grad_in_mult_); - return extra + min; - } else { - *grad_in = static_cast(-grad_in_mult_); - return span - extra + min; - } -} - -// Computes the pixel source index value for a grid coordinate -template -static __forceinline__ __device__ -scalar_t grid_sampler_compute_source_index( - scalar_t coord, - int size, - GridSamplerPadding padding_mode, - bool align_corners) { - coord = grid_sampler_unnormalize(coord, size, align_corners); - if (padding_mode == GridSamplerPadding::Border) { - // clip coordinates to image borders - coord = clip_coordinates(coord, size); - } else if (padding_mode == GridSamplerPadding::Reflection) { - // reflect coordinates by image borders - if (align_corners) { - coord = reflect_coordinates(coord, 0, 2*(size - 1)); - } else { - coord = reflect_coordinates(coord, -1, 2*size - 1); - // when align_corners=False, reflection does not auto clip coords - coord = clip_coordinates(coord, size); - } - } - return coord; -} - -// grid_sampler_compute_source_index_set_grad works similarly to -// grid_sampler_compute_source_index except that it also returns the -// `d output / d input` via pointer argument `grad_in`. -// This is useful in the backward pass of grid_sampler. -template -static __forceinline__ __device__ -scalar_t grid_sampler_compute_source_index_set_grad( - scalar_t coord, - int size, - GridSamplerPadding padding_mode, - bool align_corners, - scalar_t *grad_in) { - scalar_t grad_clip, grad_refl; - coord = grid_sampler_unnormalize_set_grad(coord, size, align_corners, grad_in); - if (padding_mode == GridSamplerPadding::Border) { - // clip coordinates to image borders - coord = clip_coordinates_set_grad(coord, size, &grad_clip); - *grad_in = (*grad_in) * grad_clip; - } else if (padding_mode == GridSamplerPadding::Reflection) { - // reflect coordinates by image borders - if (align_corners) { - coord = reflect_coordinates_set_grad(coord, 0, 2*(size - 1), &grad_refl); - *grad_in = (*grad_in) * grad_refl; - } else { - coord = reflect_coordinates_set_grad(coord, -1, 2*size - 1, &grad_refl); - // when align_corners=False, reflection does not auto clip coords - coord = clip_coordinates_set_grad(coord, size, &grad_clip); - *grad_in = (*grad_in) * grad_refl * grad_clip; - } - } - return coord; -} - -static __forceinline__ __device__ -bool within_bounds_2d(int h, int w, int H, int W) { - return h >= 0 && h < H && w >= 0 && w < W; -} - -static __forceinline__ __device__ -bool within_bounds_3d(int d, int h, int w, int D, int H, int W) { - return d >= 0 && d < D && h >= 0 && h < H && w >= 0 && w < W; -} - -template -static __forceinline__ __device__ -void safe_add_2d(scalar_t *data, int h, int w, - int sH, int sW, int H, int W, - scalar_t delta) { - if (within_bounds_2d(h, w, H, W)) { - atomicAdd(data + h * sH + w * sW, delta); - } -} - -template -static __forceinline__ __device__ -void safe_add_3d(scalar_t *data, int d, int h, int w, - int sD, int sH, int sW, int D, int H, int W, - scalar_t delta) { - if (within_bounds_3d(d, h, w, D, H, W)) { - atomicAdd(data + d * sD + h * sH + w * sW, delta); - } -} - -} // namespace at::mmdetection diff --git a/mmdet/ops/grid_sampler/src/grid_sampler_ext.cpp b/mmdet/ops/grid_sampler/src/grid_sampler_ext.cpp deleted file mode 100644 index 7e76a7aab80..00000000000 --- a/mmdet/ops/grid_sampler/src/grid_sampler_ext.cpp +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include - -namespace mmdetection { - -using namespace at; - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -Tensor grid_sampler_2d_forward_cpu(const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, - bool align_corners); - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -Tensor grid_sampler_3d_forward_cpu(const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, - bool align_corners); - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -std::tuple -grid_sampler_2d_backward_cpu(const Tensor& grad_output, const Tensor& input, - const Tensor& grid, int64_t interpolation_mode, - int64_t padding_mode, bool align_corners); - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -std::tuple -grid_sampler_3d_backward_cpu(const Tensor& grad_output, const Tensor& input, - const Tensor& grid, int64_t interpolation_mode, int64_t padding_mode, - bool align_corners); - -#ifdef WITH_CUDA -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -Tensor grid_sampler_2d_forward_cuda(const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, - bool align_corners); - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -Tensor grid_sampler_3d_forward_cuda(const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, - bool align_corners); - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -std::tuple -grid_sampler_2d_backward_cuda(const Tensor& grad_output, const Tensor& input, - const Tensor& grid, int64_t interpolation_mode, - int64_t padding_mode, bool align_corners); - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -std::tuple -grid_sampler_3d_backward_cuda(const Tensor& grad_output, const Tensor& input, - const Tensor& grid, int64_t interpolation_mode, int64_t padding_mode, - bool align_corners); -#endif - -// No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. -Tensor grid_sampler_forward(const Tensor& input, const Tensor& grid, - int64_t interpolation_mode, int64_t padding_mode, - bool align_corners) { - if (input.dim() == 4) { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return grid_sampler_2d_forward_cuda(input, grid, interpolation_mode, - padding_mode, align_corners); -#else - AT_ERROR("grid_sampler is not compiled with GPU support"); -#endif - } - return grid_sampler_2d_forward_cpu(input, grid, interpolation_mode, - padding_mode, align_corners); - } else { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return grid_sampler_3d_forward_cuda(input, grid, interpolation_mode, - padding_mode, align_corners); -#else - AT_ERROR("grid_sampler is not compiled with GPU support"); -#endif - } - return grid_sampler_3d_forward_cpu(input, grid, interpolation_mode, - padding_mode, align_corners); - } -} - -std::tuple -grid_sampler_backward(const Tensor& grad_output, const Tensor& input, - const Tensor& grid, int64_t interpolation_mode, - int64_t padding_mode, bool align_corners) { - if (input.dim() == 4) { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return grid_sampler_2d_backward_cuda(grad_output, input, grid, - interpolation_mode, padding_mode, align_corners); -#else - AT_ERROR("grid_sampler is not compiled with GPU support"); -#endif - } - return grid_sampler_2d_backward_cpu(grad_output, input, grid, - interpolation_mode, padding_mode, align_corners); - } else { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return grid_sampler_3d_backward_cuda(grad_output, input, grid, - interpolation_mode, padding_mode, align_corners); -#else - AT_ERROR("grid_sampler is not compiled with GPU support"); -#endif - } - return grid_sampler_3d_backward_cpu(grad_output, input, grid, - interpolation_mode, padding_mode, align_corners); - } -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("grid_sampler_forward_cuda", &grid_sampler_forward, "grid_sampler_forward"); - m.def("grid_sampler_backward_cuda", &grid_sampler_backward, "grid_sampler_backward"); -} - -} // namespace mmdetection diff --git a/mmdet/ops/masked_conv/src/cuda/masked_conv2d_cuda.cpp b/mmdet/ops/masked_conv/src/cuda/masked_conv2d_cuda.cpp index b2850d916a4..84bd7c27913 100644 --- a/mmdet/ops/masked_conv/src/cuda/masked_conv2d_cuda.cpp +++ b/mmdet/ops/masked_conv/src/cuda/masked_conv2d_cuda.cpp @@ -17,9 +17,9 @@ int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, const at::Tensor mask_w_idx, const int mask_cnt, at::Tensor im); -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmdet/ops/masked_conv/src/cuda/masked_conv2d_kernel.cu b/mmdet/ops/masked_conv/src/cuda/masked_conv2d_kernel.cu index 81c785bbe41..b8323592f52 100644 --- a/mmdet/ops/masked_conv/src/cuda/masked_conv2d_kernel.cu +++ b/mmdet/ops/masked_conv/src/cuda/masked_conv2d_kernel.cu @@ -59,10 +59,10 @@ int MaskedIm2colForwardLaucher(const at::Tensor bottom_data, const int height, AT_DISPATCH_FLOATING_TYPES_AND_HALF( bottom_data.scalar_type(), "MaskedIm2colLaucherForward", ([&] { - const scalar_t *bottom_data_ = bottom_data.data(); - const int64_t *mask_h_idx_ = mask_h_idx.data(); - const int64_t *mask_w_idx_ = mask_w_idx.data(); - scalar_t *top_data_ = top_data.data(); + const scalar_t *bottom_data_ = bottom_data.data_ptr(); + const int64_t *mask_h_idx_ = mask_h_idx.data_ptr(); + const int64_t *mask_w_idx_ = mask_w_idx.data_ptr(); + scalar_t *top_data_ = top_data.data_ptr(); MaskedIm2colForward <<>>( @@ -99,10 +99,10 @@ int MaskedCol2imForwardLaucher(const at::Tensor bottom_data, const int height, AT_DISPATCH_FLOATING_TYPES_AND_HALF( bottom_data.scalar_type(), "MaskedCol2imLaucherForward", ([&] { - const scalar_t *bottom_data_ = bottom_data.data(); - const int64_t *mask_h_idx_ = mask_h_idx.data(); - const int64_t *mask_w_idx_ = mask_w_idx.data(); - scalar_t *top_data_ = top_data.data(); + const scalar_t *bottom_data_ = bottom_data.data_ptr(); + const int64_t *mask_h_idx_ = mask_h_idx.data_ptr(); + const int64_t *mask_w_idx_ = mask_w_idx.data_ptr(); + scalar_t *top_data_ = top_data.data_ptr(); MaskedCol2imForward <<>>( diff --git a/mmdet/ops/masked_conv/src/masked_conv2d_ext.cpp b/mmdet/ops/masked_conv/src/masked_conv2d_ext.cpp index 5bf60be580e..39058ad7755 100644 --- a/mmdet/ops/masked_conv/src/masked_conv2d_ext.cpp +++ b/mmdet/ops/masked_conv/src/masked_conv2d_ext.cpp @@ -19,7 +19,7 @@ int masked_im2col_forward(const at::Tensor im, const at::Tensor mask_h_idx, const at::Tensor mask_w_idx, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, at::Tensor col) { - if (im.type().is_cuda()) { + if (im.device().is_cuda()) { #ifdef WITH_CUDA return masked_im2col_forward_cuda(im, mask_h_idx, mask_w_idx, kernel_h, kernel_w, pad_h, pad_w, col); @@ -34,7 +34,7 @@ int masked_col2im_forward(const at::Tensor col, const at::Tensor mask_h_idx, const at::Tensor mask_w_idx, int height, int width, int channels, at::Tensor im) { - if (col.type().is_cuda()) { + if (col.device().is_cuda()) { #ifdef WITH_CUDA return masked_col2im_forward_cuda(col, mask_h_idx, mask_w_idx, height, width, channels, im); diff --git a/mmdet/ops/nms/src/cpu/nms_cpu.cpp b/mmdet/ops/nms/src/cpu/nms_cpu.cpp index 4d11abec7e6..aa652ea396c 100644 --- a/mmdet/ops/nms/src/cpu/nms_cpu.cpp +++ b/mmdet/ops/nms/src/cpu/nms_cpu.cpp @@ -6,7 +6,7 @@ template at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { - AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); + AT_ASSERTM(!dets.device().is_cuda(), "dets must be a CPU tensor"); if (dets.numel() == 0) { return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); @@ -26,13 +26,13 @@ at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); - auto suppressed = suppressed_t.data(); - auto order = order_t.data(); - auto x1 = x1_t.data(); - auto y1 = y1_t.data(); - auto x2 = x2_t.data(); - auto y2 = y2_t.data(); - auto areas = areas_t.data(); + auto suppressed = suppressed_t.data_ptr(); + auto order = order_t.data_ptr(); + auto x1 = x1_t.data_ptr(); + auto y1 = y1_t.data_ptr(); + auto x2 = x2_t.data_ptr(); + auto y2 = y2_t.data_ptr(); + auto areas = areas_t.data_ptr(); for (int64_t _i = 0; _i < ndets; _i++) { auto i = order[_i]; @@ -73,7 +73,7 @@ template at::Tensor soft_nms_cpu_kernel(const at::Tensor& dets, const float threshold, const unsigned char method, const float sigma, const float min_score) { - AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); + AT_ASSERTM(!dets.device().is_cuda(), "dets must be a CPU tensor"); if (dets.numel() == 0) { return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); @@ -88,16 +88,16 @@ at::Tensor soft_nms_cpu_kernel(const at::Tensor& dets, const float threshold, at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t); auto ndets = dets.size(0); - auto x1 = x1_t.data(); - auto y1 = y1_t.data(); - auto x2 = x2_t.data(); - auto y2 = y2_t.data(); - auto scores = scores_t.data(); - auto areas = areas_t.data(); + auto x1 = x1_t.data_ptr(); + auto y1 = y1_t.data_ptr(); + auto x2 = x2_t.data_ptr(); + auto y2 = y2_t.data_ptr(); + auto scores = scores_t.data_ptr(); + auto areas = areas_t.data_ptr(); int64_t pos = 0; at::Tensor inds_t = at::arange(ndets, dets.options()); - auto inds = inds_t.data(); + auto inds = inds_t.data_ptr(); for (int64_t i = 0; i < ndets; i++) { auto max_score = scores[i]; diff --git a/mmdet/ops/nms/src/cuda/nms_cuda.cpp b/mmdet/ops/nms/src/cuda/nms_cuda.cpp index 61ca93a273c..d46b8166904 100644 --- a/mmdet/ops/nms/src/cuda/nms_cuda.cpp +++ b/mmdet/ops/nms/src/cuda/nms_cuda.cpp @@ -1,7 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. #include -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") at::Tensor nms_cuda_forward(const at::Tensor boxes, float nms_overlap_thresh); diff --git a/mmdet/ops/nms/src/cuda/nms_kernel.cu b/mmdet/ops/nms/src/cuda/nms_kernel.cu index 4a0800f5207..bb6d18abcfa 100644 --- a/mmdet/ops/nms/src/cuda/nms_kernel.cu +++ b/mmdet/ops/nms/src/cuda/nms_kernel.cu @@ -74,7 +74,7 @@ at::Tensor nms_cuda_forward(const at::Tensor boxes, float nms_overlap_thresh) { at::DeviceGuard guard(boxes.device()); using scalar_t = float; - AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); + AT_ASSERTM(boxes.device().is_cuda(), "boxes must be a CUDA tensor"); auto scores = boxes.select(1, 4); auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); auto boxes_sorted = boxes.index_select(0, order_t); @@ -83,7 +83,7 @@ at::Tensor nms_cuda_forward(const at::Tensor boxes, float nms_overlap_thresh) { const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); - scalar_t* boxes_dev = boxes_sorted.data(); + scalar_t* boxes_dev = boxes_sorted.data_ptr(); THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState @@ -114,7 +114,7 @@ at::Tensor nms_cuda_forward(const at::Tensor boxes, float nms_overlap_thresh) { memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); - int64_t* keep_out = keep.data(); + int64_t* keep_out = keep.data_ptr(); int num_to_keep = 0; for (int i = 0; i < boxes_num; i++) { diff --git a/mmdet/ops/nms/src/nms_ext.cpp b/mmdet/ops/nms/src/nms_ext.cpp index 6d95303a315..6c311f2652d 100644 --- a/mmdet/ops/nms/src/nms_ext.cpp +++ b/mmdet/ops/nms/src/nms_ext.cpp @@ -13,7 +13,7 @@ at::Tensor nms_cuda(const at::Tensor& dets, const float threshold); #endif at::Tensor nms(const at::Tensor& dets, const float threshold){ - if (dets.type().is_cuda()) { + if (dets.device().is_cuda()) { #ifdef WITH_CUDA return nms_cuda(dets, threshold); #else @@ -26,7 +26,7 @@ at::Tensor nms(const at::Tensor& dets, const float threshold){ at::Tensor soft_nms(const at::Tensor& dets, const float threshold, const unsigned char method, const float sigma, const float min_score) { - if (dets.type().is_cuda()) { + if (dets.device().is_cuda()) { AT_ERROR("soft_nms is not implemented on GPU"); } return soft_nms_cpu(dets, threshold, method, sigma, min_score); diff --git a/mmdet/ops/roi_align/src/cpu/roi_align_v2.cpp b/mmdet/ops/roi_align/src/cpu/roi_align_v2.cpp index 2c6b557da24..9e01fe17da0 100644 --- a/mmdet/ops/roi_align/src/cpu/roi_align_v2.cpp +++ b/mmdet/ops/roi_align/src/cpu/roi_align_v2.cpp @@ -357,11 +357,11 @@ at::Tensor ROIAlignForwardV2CPULaucher(const at::Tensor& input, if (output.numel() == 0) return output; - AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.type(), "ROIAlign_forward", [&] { + AT_DISPATCH_FLOATING_TYPES_AND_HALF(input.scalar_type(), "ROIAlign_forward", [&] { ROIAlignForward( - output_size, input.contiguous().data(), spatial_scale, + output_size, input.contiguous().data_ptr(), spatial_scale, channels, height, width, pooled_height, pooled_width, sampling_ratio, - rois.contiguous().data(), output.data(), aligned); + rois.contiguous().data_ptr(), output.data_ptr(), aligned); }); return output; } @@ -393,11 +393,11 @@ at::Tensor ROIAlignBackwardV2CPULaucher( int h_stride = grad.stride(2); int w_stride = grad.stride(3); - AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.type(), "ROIAlign_backward", [&] { + AT_DISPATCH_FLOATING_TYPES_AND_HALF(grad.scalar_type(), "ROIAlign_backward", [&] { ROIAlignBackward( - grad.numel(), grad.contiguous().data(), spatial_scale, + grad.numel(), grad.contiguous().data_ptr(), spatial_scale, channels, height, width, pooled_height, pooled_width, sampling_ratio, - grad_input.data(), rois.contiguous().data(), + grad_input.data_ptr(), rois.contiguous().data_ptr(), n_stride, c_stride, h_stride, w_stride, aligned); }); return grad_input; diff --git a/mmdet/ops/roi_align/src/cuda/roi_align_kernel.cu b/mmdet/ops/roi_align/src/cuda/roi_align_kernel.cu index 113fc110475..7afa33229d8 100644 --- a/mmdet/ops/roi_align/src/cuda/roi_align_kernel.cu +++ b/mmdet/ops/roi_align/src/cuda/roi_align_kernel.cu @@ -125,9 +125,9 @@ int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, const int output_size = num_rois * pooled_height * pooled_width * channels; AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "ROIAlignLaucherForward", ([&] { - const scalar_t *bottom_data = features.data(); - const scalar_t *rois_data = rois.data(); - scalar_t *top_data = output.data(); + const scalar_t *bottom_data = features.data_ptr(); + const scalar_t *rois_data = rois.data_ptr(); + scalar_t *top_data = output.data_ptr(); ROIAlignForwardV1 <<(); - const scalar_t *rois_data = rois.data(); - scalar_t *bottom_diff = bottom_grad.data(); + const scalar_t *top_diff = top_grad.data_ptr(); + const scalar_t *rois_data = rois.data_ptr(); + scalar_t *bottom_diff = bottom_grad.data_ptr(); if (sizeof(scalar_t) == sizeof(double)) { fprintf(stderr, "double is not supported\n"); exit(-1); diff --git a/mmdet/ops/roi_align/src/cuda/roi_align_kernel_v2.cu b/mmdet/ops/roi_align/src/cuda/roi_align_kernel_v2.cu index 9a2f7150933..0189323cd1e 100644 --- a/mmdet/ops/roi_align/src/cuda/roi_align_kernel_v2.cu +++ b/mmdet/ops/roi_align/src/cuda/roi_align_kernel_v2.cu @@ -297,9 +297,9 @@ at::Tensor ROIAlignForwardV2Laucher(const at::Tensor& input, AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIAlign_forward", [&] { RoIAlignForwardV2<<>>( - output_size, input.contiguous().data(), spatial_scale, + output_size, input.contiguous().data_ptr(), spatial_scale, channels, height, width, pooled_height, pooled_width, sampling_ratio, - rois.contiguous().data(), output.data(), aligned); + rois.contiguous().data_ptr(), output.data_ptr(), aligned); }); cudaDeviceSynchronize(); AT_CUDA_CHECK(cudaGetLastError()); @@ -338,10 +338,10 @@ at::Tensor ROIAlignBackwardV2Laucher( AT_DISPATCH_FLOATING_TYPES(grad.scalar_type(), "ROIAlign_backward", [&] { RoIAlignBackwardFeatureV2<<>>( - grad.numel(), grad.contiguous().data(), num_rois, + grad.numel(), grad.contiguous().data_ptr(), num_rois, spatial_scale, channels, height, width, pooled_height, pooled_width, - sampling_ratio, grad_input.data(), - rois.contiguous().data(), aligned); + sampling_ratio, grad_input.data_ptr(), + rois.contiguous().data_ptr(), aligned); }); AT_CUDA_CHECK(cudaGetLastError()); return grad_input; diff --git a/mmdet/ops/roi_align/src/roi_align_ext.cpp b/mmdet/ops/roi_align/src/roi_align_ext.cpp index f01351a8f16..18add01bba2 100644 --- a/mmdet/ops/roi_align/src/roi_align_ext.cpp +++ b/mmdet/ops/roi_align/src/roi_align_ext.cpp @@ -46,9 +46,9 @@ at::Tensor ROIAlignBackwardV2CPULaucher( const int channels, const int height, const int width, const int sampling_ratio, bool aligned); -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) @@ -56,7 +56,7 @@ at::Tensor ROIAlignBackwardV2CPULaucher( int ROIAlign_forwardV1(at::Tensor features, at::Tensor rois, int pooled_height, int pooled_width, float spatial_scale, int sample_num, at::Tensor output) { - if (features.type().is_cuda()) { + if (features.device().is_cuda()) { #ifdef WITH_CUDA CHECK_INPUT(features); CHECK_INPUT(rois); @@ -91,7 +91,7 @@ int ROIAlign_forwardV1(at::Tensor features, at::Tensor rois, int pooled_height, int ROIAlign_backwardV1(at::Tensor top_grad, at::Tensor rois, int pooled_height, int pooled_width, float spatial_scale, int sample_num, at::Tensor bottom_grad) { - if (top_grad.type().is_cuda()) { + if (top_grad.device().is_cuda()) { #ifdef WITH_CUDA CHECK_INPUT(top_grad); CHECK_INPUT(rois); @@ -129,7 +129,7 @@ inline at::Tensor ROIAlign_forwardV2(const at::Tensor& input, const int pooled_height, const int pooled_width, const int sampling_ratio, bool aligned) { - if (input.type().is_cuda()) { + if (input.device().is_cuda()) { #ifdef WITH_CUDA return ROIAlignForwardV2Laucher(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned); @@ -146,7 +146,7 @@ inline at::Tensor ROIAlign_backwardV2( const int pooled_height, const int pooled_width, const int batch_size, const int channels, const int height, const int width, const int sampling_ratio, bool aligned) { - if (grad.type().is_cuda()) { + if (grad.device().is_cuda()) { #ifdef WITH_CUDA return ROIAlignBackwardV2Laucher(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, diff --git a/mmdet/ops/roi_pool/src/cuda/roi_pool_kernel.cu b/mmdet/ops/roi_pool/src/cuda/roi_pool_kernel.cu index 2e34ff0a10f..88fab97fbb4 100644 --- a/mmdet/ops/roi_pool/src/cuda/roi_pool_kernel.cu +++ b/mmdet/ops/roi_pool/src/cuda/roi_pool_kernel.cu @@ -88,10 +88,10 @@ int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, AT_DISPATCH_FLOATING_TYPES_AND_HALF( features.scalar_type(), "ROIPoolLaucherForward", ([&] { - const scalar_t *bottom_data = features.data(); - const scalar_t *rois_data = rois.data(); - scalar_t *top_data = output.data(); - int *argmax_data = argmax.data(); + const scalar_t *bottom_data = features.data_ptr(); + const scalar_t *rois_data = rois.data_ptr(); + scalar_t *top_data = output.data_ptr(); + int *argmax_data = argmax.data_ptr(); ROIPoolForward<<>>( @@ -132,10 +132,10 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const int output_size = num_rois * pooled_h * pooled_w * channels; AT_DISPATCH_FLOATING_TYPES_AND_HALF( top_grad.scalar_type(), "ROIPoolLaucherBackward", ([&] { - const scalar_t *top_diff = top_grad.data(); - const scalar_t *rois_data = rois.data(); - const int *argmax_data = argmax.data(); - scalar_t *bottom_diff = bottom_grad.data(); + const scalar_t *top_diff = top_grad.data_ptr(); + const scalar_t *rois_data = rois.data_ptr(); + const int *argmax_data = argmax.data_ptr(); + scalar_t *bottom_diff = bottom_grad.data_ptr(); if (sizeof(scalar_t) == sizeof(double)) { fprintf(stderr, "double is not supported\n"); exit(-1); diff --git a/mmdet/ops/roi_pool/src/roi_pool_ext.cpp b/mmdet/ops/roi_pool/src/roi_pool_ext.cpp index af7bd8553c3..27d6b8a5d07 100644 --- a/mmdet/ops/roi_pool/src/roi_pool_ext.cpp +++ b/mmdet/ops/roi_pool/src/roi_pool_ext.cpp @@ -18,9 +18,9 @@ int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, const int pooled_w, at::Tensor bottom_grad); #endif -#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ") #define CHECK_CONTIGUOUS(x) \ - AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") + TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") #define CHECK_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) @@ -29,7 +29,7 @@ int roi_pooling_forward(at::Tensor features, at::Tensor rois, int pooled_height, int pooled_width, float spatial_scale, at::Tensor output, at::Tensor argmax) { - if (features.type().is_cuda()) { + if (features.device().is_cuda()) { #ifdef WITH_CUDA CHECK_INPUT(features); CHECK_INPUT(rois); @@ -64,7 +64,7 @@ int roi_pooling_forward(at::Tensor features, at::Tensor rois, int roi_pooling_backward(at::Tensor top_grad, at::Tensor rois, at::Tensor argmax, float spatial_scale, at::Tensor bottom_grad) { - if (top_grad.type().is_cuda()) { + if (top_grad.device().is_cuda()) { #ifdef WITH_CUDA CHECK_INPUT(top_grad); CHECK_INPUT(rois); diff --git a/mmdet/ops/sigmoid_focal_loss/src/cuda/sigmoid_focal_loss_cuda.cu b/mmdet/ops/sigmoid_focal_loss/src/cuda/sigmoid_focal_loss_cuda.cu index 5101a113eff..797dcf355eb 100644 --- a/mmdet/ops/sigmoid_focal_loss/src/cuda/sigmoid_focal_loss_cuda.cu +++ b/mmdet/ops/sigmoid_focal_loss/src/cuda/sigmoid_focal_loss_cuda.cu @@ -100,8 +100,8 @@ at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, const at::Tensor &targets, const int num_classes, const float gamma, const float alpha) { - AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); - AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); + AT_ASSERTM(logits.device().is_cuda(), "logits must be a CUDA tensor"); + AT_ASSERTM(targets.device().is_cuda(), "targets must be a CUDA tensor"); AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); const int num_samples = logits.size(0); @@ -121,9 +121,9 @@ at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, AT_DISPATCH_FLOATING_TYPES_AND_HALF( logits.scalar_type(), "SigmoidFocalLoss_forward", [&] { SigmoidFocalLossForward<<>>( - losses_size, logits.contiguous().data(), - targets.contiguous().data(), num_classes, gamma, alpha, - num_samples, losses.data()); + losses_size, logits.contiguous().data_ptr(), + targets.contiguous().data_ptr(), num_classes, gamma, alpha, + num_samples, losses.data_ptr()); }); THCudaCheck(cudaGetLastError()); return losses; @@ -135,9 +135,9 @@ at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, const int num_classes, const float gamma, const float alpha) { - AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); - AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); - AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor"); + AT_ASSERTM(logits.device().is_cuda(), "logits must be a CUDA tensor"); + AT_ASSERTM(targets.device().is_cuda(), "targets must be a CUDA tensor"); + AT_ASSERTM(d_losses.device().is_cuda(), "d_losses must be a CUDA tensor"); AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); @@ -160,10 +160,10 @@ at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, AT_DISPATCH_FLOATING_TYPES_AND_HALF( logits.scalar_type(), "SigmoidFocalLoss_backward", [&] { SigmoidFocalLossBackward<<>>( - d_logits_size, logits.contiguous().data(), - targets.contiguous().data(), - d_losses.contiguous().data(), num_classes, gamma, alpha, - num_samples, d_logits.data()); + d_logits_size, logits.contiguous().data_ptr(), + targets.contiguous().data_ptr(), + d_losses.contiguous().data_ptr(), num_classes, gamma, alpha, + num_samples, d_logits.data_ptr()); }); THCudaCheck(cudaGetLastError()); diff --git a/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_ext.cpp b/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_ext.cpp index faf2e787297..3d66f3f8ff8 100644 --- a/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_ext.cpp +++ b/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_ext.cpp @@ -20,7 +20,7 @@ at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, const at::Tensor &targets, const int num_classes, const float gamma, const float alpha) { - if (logits.type().is_cuda()) { + if (logits.device().is_cuda()) { #ifdef WITH_CUDA at::DeviceGuard guard(logits.device()); return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, @@ -37,7 +37,7 @@ at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, const at::Tensor &d_losses, const int num_classes, const float gamma, const float alpha) { - if (logits.type().is_cuda()) { + if (logits.device().is_cuda()) { #ifdef WITH_CUDA at::DeviceGuard guard(logits.device()); return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, diff --git a/setup.py b/setup.py index 14af9d1bce6..e70a53110a6 100755 --- a/setup.py +++ b/setup.py @@ -282,19 +282,6 @@ def gen_packages_items(): 'src/cuda/masked_conv2d_cuda.cpp', 'src/cuda/masked_conv2d_kernel.cu' ]), - make_cuda_ext( - name='affine_grid_ext', - module='mmdet.ops.affine_grid', - sources=[ - 'src/affine_grid_ext.cpp', 'src/cpu/affine_grid_cpu.cpp' - ]), - make_cuda_ext( - name='grid_sampler_ext', - module='mmdet.ops.grid_sampler', - sources=[ - 'src/grid_sampler_ext.cpp', 'src/cpu/grid_sampler_cpu.cpp' - ], - sources_cuda=['src/cuda/grid_sampler_cuda.cu']), make_cuda_ext( name='carafe_ext', module='mmdet.ops.carafe',