Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@ file(GLOB HEADERS torchvision/csrc/*.h)
# Image extension
file(GLOB IMAGE_HEADERS torchvision/csrc/cpu/image/*.h)
file(GLOB IMAGE_SOURCES torchvision/csrc/cpu/image/*.cpp)
file(GLOB OPERATOR_SOURCES torchvision/csrc/cpu/*.h torchvision/csrc/cpu/*.cpp ${IMAGE_HEADERS} ${IMAGE_SOURCES} ${HEADERS} torchvision/csrc/*.cpp)
file(GLOB OPERATOR_HEADERS torchvision/csrc/cpu/*.h)
file(GLOB OPERATOR_SOURCES ${OPERATOR_HEADERS} torchvision/csrc/cpu/*.cpp ${IMAGE_HEADERS} ${IMAGE_SOURCES} ${HEADERS} torchvision/csrc/*.cpp)
if(WITH_CUDA)
file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} torchvision/csrc/cuda/*.h torchvision/csrc/cuda/*.cu)
file(GLOB OPERATOR_HEADERS ${OPERATOR_HEADERS} torchvision/csrc/cuda/*.h)
file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} ${OPERATOR_HEADERS} torchvision/csrc/cuda/*.cu)
endif()
file(GLOB MODELS_HEADERS torchvision/csrc/models/*.h)
file(GLOB MODELS_SOURCES torchvision/csrc/models/*.h torchvision/csrc/models/*.cpp)
Expand Down Expand Up @@ -95,11 +97,11 @@ install(EXPORT TorchVisionTargets

install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME})
install(FILES
torchvision/csrc/cpu/vision_cpu.h
${OPERATOR_HEADERS}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cpu)
if(WITH_CUDA)
install(FILES
torchvision/csrc/cuda/vision_cuda.h
${OPERATOR_HEADERS}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda)
endif()
install(FILES ${MODELS_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/models)
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#include "vision_cpu.h"
#include "nms_kernel.h"

namespace {

template <typename scalar_t>
at::Tensor nms_cpu_kernel(
at::Tensor nms_kernel(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold) {
Expand Down Expand Up @@ -69,6 +71,8 @@ at::Tensor nms_cpu_kernel(
return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
}

} // namespace

at::Tensor nms_cpu(
const at::Tensor& dets,
const at::Tensor& scores,
Expand All @@ -95,7 +99,7 @@ at::Tensor nms_cpu(
auto result = at::empty({0}, dets.options());

AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
result = nms_cpu_kernel<scalar_t>(dets, scores, iou_threshold);
result = nms_kernel<scalar_t>(dets, scores, iou_threshold);
});
return result;
}
9 changes: 9 additions & 0 deletions torchvision/csrc/cpu/nms_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

#include <ATen/ATen.h>
#include "../macros.h"

VISION_API at::Tensor nms_cpu(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold);
5 changes: 0 additions & 5 deletions torchvision/csrc/cpu/vision_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@

// TODO: Delete this file once all the methods are gone

VISION_API at::Tensor nms_cpu(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold);

VISION_API std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
#include <c10/cuda/CUDAGuard.h>

#include "cuda_helpers.h"
#include "nms_kernel.h"

#include <iostream>
#include <vector>
namespace {

int const threadsPerBlock = sizeof(unsigned long long) * 8;

template <typename T>
__device__ inline bool devIoU(T const* const a, T const* const b, const float threshold) {
__device__ inline bool devIoU(
T const* const a,
T const* const b,
const float threshold) {
T left = max(a[0], b[0]), right = min(a[2], b[2]);
T top = max(a[1], b[1]), bottom = min(a[3], b[3]);
T width = max(right - left, (T)0), height = max(bottom - top, (T)0);
Expand All @@ -29,7 +32,8 @@ __global__ void nms_kernel(
const int row_start = blockIdx.y;
const int col_start = blockIdx.x;

if (row_start > col_start) return;
if (row_start > col_start)
return;

const int row_size =
min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
Expand Down Expand Up @@ -68,6 +72,8 @@ __global__ void nms_kernel(
}
}

} // namespace

at::Tensor nms_cuda(const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold) {
Expand Down
9 changes: 9 additions & 0 deletions torchvision/csrc/cuda/nms_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

#include <ATen/ATen.h>
#include "../macros.h"

VISION_API at::Tensor nms_cuda(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold);
5 changes: 0 additions & 5 deletions torchvision/csrc/cuda/vision_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@

// TODO: Delete this file once all the methods are gone

VISION_API at::Tensor nms_cuda(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold);

VISION_API std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cuda(
const at::Tensor& input,
const at::Tensor& rois,
Expand Down
29 changes: 29 additions & 0 deletions torchvision/csrc/nms.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include "nms.h"
#include <torch/extension.h>

#if defined(WITH_CUDA) || defined(WITH_HIP)
#include <ATen/autocast_mode.h>
#endif

at::Tensor nms(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold) {
static auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("torchvision::nms", "")
.typed<decltype(nms)>();
return op.call(dets, scores, iou_threshold);
}

#if defined(WITH_CUDA) || defined(WITH_HIP)
at::Tensor nms_autocast(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold) {
c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast);
return nms(
at::autocast::cached_cast(at::kFloat, dets),
at::autocast::cached_cast(at::kFloat, scores),
iou_threshold);
}
#endif
26 changes: 7 additions & 19 deletions torchvision/csrc/nms.h
Original file line number Diff line number Diff line change
@@ -1,36 +1,24 @@
#pragma once

#include "cpu/vision_cpu.h"
#include "cpu/nms_kernel.h"

#ifdef WITH_CUDA
#include "autocast.h"
#include "cuda/vision_cuda.h"
#include "cuda/nms_kernel.h"
#endif
#ifdef WITH_HIP
#include "autocast.h"
#include "hip/vision_cuda.h"
#include "hip/nms_kernel.h"
#endif

// nms dispatch nexus
// C++ Forward
at::Tensor nms(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold) {
static auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("torchvision::nms", "")
.typed<decltype(nms)>();
return op.call(dets, scores, iou_threshold);
}
double iou_threshold);

// Autocast Forward
#if defined(WITH_CUDA) || defined(WITH_HIP)
at::Tensor nms_autocast(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold) {
c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast);
return nms(
at::autocast::cached_cast(at::kFloat, dets),
at::autocast::cached_cast(at::kFloat, scores),
iou_threshold);
}
double iou_threshold);
#endif