From 03ca883c16afb2e4f1703c98b4d565ca6d803453 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 1 Dec 2020 17:47:02 +0000 Subject: [PATCH 1/6] Syncing, where possible, the names of functions across devices. --- torchvision/csrc/cpu/nms_cpu.cpp | 4 ++-- torchvision/csrc/cuda/nms_cuda.cu | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/torchvision/csrc/cpu/nms_cpu.cpp b/torchvision/csrc/cpu/nms_cpu.cpp index 00a4c61db7a..05d089e5fde 100644 --- a/torchvision/csrc/cpu/nms_cpu.cpp +++ b/torchvision/csrc/cpu/nms_cpu.cpp @@ -1,7 +1,7 @@ #include "vision_cpu.h" template -at::Tensor nms_cpu_kernel( +at::Tensor nms_kernel( const at::Tensor& dets, const at::Tensor& scores, double iou_threshold) { @@ -95,7 +95,7 @@ at::Tensor nms_cpu( auto result = at::empty({0}, dets.options()); AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { - result = nms_cpu_kernel(dets, scores, iou_threshold); + result = nms_kernel(dets, scores, iou_threshold); }); return result; } diff --git a/torchvision/csrc/cuda/nms_cuda.cu b/torchvision/csrc/cuda/nms_cuda.cu index 548dc2f69cb..e12167fba8d 100644 --- a/torchvision/csrc/cuda/nms_cuda.cu +++ b/torchvision/csrc/cuda/nms_cuda.cu @@ -10,7 +10,10 @@ int const threadsPerBlock = sizeof(unsigned long long) * 8; template -__device__ inline bool devIoU(T const* const a, T const* const b, const float threshold) { +__device__ inline bool devIoU( + T const* const a, + T const* const b, + const float threshold) { T left = max(a[0], b[0]), right = min(a[2], b[2]); T top = max(a[1], b[1]), bottom = min(a[3], b[3]); T width = max(right - left, (T)0), height = max(bottom - top, (T)0); @@ -29,7 +32,8 @@ __global__ void nms_kernel( const int row_start = blockIdx.y; const int col_start = blockIdx.x; - if (row_start > col_start) return; + if (row_start > col_start) + return; const int row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); From 513b7d9ed1e44a96414d5bb2fbd74c9496b5da0a Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 1 Dec 2020 17:51:21 +0000 Subject: [PATCH 2/6] Adding all internal functions in anonymous namespaces. --- torchvision/csrc/cpu/nms_cpu.cpp | 4 ++++ torchvision/csrc/cuda/nms_cuda.cu | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/torchvision/csrc/cpu/nms_cpu.cpp b/torchvision/csrc/cpu/nms_cpu.cpp index 05d089e5fde..066ededcd1d 100644 --- a/torchvision/csrc/cpu/nms_cpu.cpp +++ b/torchvision/csrc/cpu/nms_cpu.cpp @@ -1,5 +1,7 @@ #include "vision_cpu.h" +namespace { + template at::Tensor nms_kernel( const at::Tensor& dets, @@ -69,6 +71,8 @@ at::Tensor nms_kernel( return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); } +} // namespace + at::Tensor nms_cpu( const at::Tensor& dets, const at::Tensor& scores, diff --git a/torchvision/csrc/cuda/nms_cuda.cu b/torchvision/csrc/cuda/nms_cuda.cu index e12167fba8d..98efb92870c 100644 --- a/torchvision/csrc/cuda/nms_cuda.cu +++ b/torchvision/csrc/cuda/nms_cuda.cu @@ -7,6 +7,8 @@ #include #include +namespace { + int const threadsPerBlock = sizeof(unsigned long long) * 8; template @@ -72,6 +74,8 @@ __global__ void nms_kernel( } } +} // namespace + at::Tensor nms_cuda(const at::Tensor& dets, const at::Tensor& scores, double iou_threshold) { From e2ff902c024078ce5f4339043385cae4f687977c Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 1 Dec 2020 18:01:23 +0000 Subject: [PATCH 3/6] Renaming C++/CUDA kernel files and moving operator code from header to cpp file. --- torchvision/csrc/cpu/{nms_cpu.cpp => nms_kernel.cpp} | 0 torchvision/csrc/cuda/{nms_cuda.cu => nms_kernel.cu} | 0 torchvision/csrc/{nms.h => nms.cpp} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename torchvision/csrc/cpu/{nms_cpu.cpp => nms_kernel.cpp} (100%) rename torchvision/csrc/cuda/{nms_cuda.cu => nms_kernel.cu} (100%) rename torchvision/csrc/{nms.h => nms.cpp} (100%) diff --git a/torchvision/csrc/cpu/nms_cpu.cpp b/torchvision/csrc/cpu/nms_kernel.cpp similarity index 100% rename from torchvision/csrc/cpu/nms_cpu.cpp rename to torchvision/csrc/cpu/nms_kernel.cpp diff --git a/torchvision/csrc/cuda/nms_cuda.cu b/torchvision/csrc/cuda/nms_kernel.cu similarity index 100% rename from torchvision/csrc/cuda/nms_cuda.cu rename to torchvision/csrc/cuda/nms_kernel.cu diff --git a/torchvision/csrc/nms.h b/torchvision/csrc/nms.cpp similarity index 100% rename from torchvision/csrc/nms.h rename to torchvision/csrc/nms.cpp From 8b86fbec110cce4655a8594fa1ec4bd0144c832a Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 1 Dec 2020 18:21:59 +0000 Subject: [PATCH 4/6] Create foreach cpp file a separate header file with "public" functions. --- torchvision/csrc/cpu/nms_kernel.cpp | 2 +- torchvision/csrc/cpu/nms_kernel.h | 9 +++++++++ torchvision/csrc/cpu/vision_cpu.h | 5 ----- torchvision/csrc/cuda/nms_kernel.cu | 1 + torchvision/csrc/cuda/nms_kernel.h | 9 +++++++++ torchvision/csrc/cuda/vision_cuda.h | 5 ----- torchvision/csrc/nms.cpp | 15 ++++----------- torchvision/csrc/nms.h | 24 ++++++++++++++++++++++++ 8 files changed, 48 insertions(+), 22 deletions(-) create mode 100644 torchvision/csrc/cpu/nms_kernel.h create mode 100644 torchvision/csrc/cuda/nms_kernel.h create mode 100644 torchvision/csrc/nms.h diff --git a/torchvision/csrc/cpu/nms_kernel.cpp b/torchvision/csrc/cpu/nms_kernel.cpp index 066ededcd1d..036a91f56dc 100644 --- a/torchvision/csrc/cpu/nms_kernel.cpp +++ b/torchvision/csrc/cpu/nms_kernel.cpp @@ -1,4 +1,4 @@ -#include "vision_cpu.h" +#include "nms_kernel.h" namespace { diff --git a/torchvision/csrc/cpu/nms_kernel.h b/torchvision/csrc/cpu/nms_kernel.h new file mode 100644 index 00000000000..7b6ef442626 --- /dev/null +++ b/torchvision/csrc/cpu/nms_kernel.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API at::Tensor nms_cpu( + const at::Tensor& dets, + const at::Tensor& scores, + double iou_threshold); diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h index 6f85d9c0256..39d89bf6515 100644 --- a/torchvision/csrc/cpu/vision_cpu.h +++ b/torchvision/csrc/cpu/vision_cpu.h @@ -4,11 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API at::Tensor nms_cpu( - const at::Tensor& dets, - const at::Tensor& scores, - double iou_threshold); - VISION_API std::tuple PSROIAlign_forward_cpu( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/cuda/nms_kernel.cu b/torchvision/csrc/cuda/nms_kernel.cu index 98efb92870c..6fa58bfc329 100644 --- a/torchvision/csrc/cuda/nms_kernel.cu +++ b/torchvision/csrc/cuda/nms_kernel.cu @@ -3,6 +3,7 @@ #include #include "cuda_helpers.h" +#include "nms_kernel.h" #include #include diff --git a/torchvision/csrc/cuda/nms_kernel.h b/torchvision/csrc/cuda/nms_kernel.h new file mode 100644 index 00000000000..1eceddaccf3 --- /dev/null +++ b/torchvision/csrc/cuda/nms_kernel.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include "../macros.h" + +VISION_API at::Tensor nms_cuda( + const at::Tensor& dets, + const at::Tensor& scores, + double iou_threshold); diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h index 834973c5327..b17f00d6acf 100644 --- a/torchvision/csrc/cuda/vision_cuda.h +++ b/torchvision/csrc/cuda/vision_cuda.h @@ -4,11 +4,6 @@ // TODO: Delete this file once all the methods are gone -VISION_API at::Tensor nms_cuda( - const at::Tensor& dets, - const at::Tensor& scores, - double iou_threshold); - VISION_API std::tuple PSROIAlign_forward_cuda( const at::Tensor& input, const at::Tensor& rois, diff --git a/torchvision/csrc/nms.cpp b/torchvision/csrc/nms.cpp index aed675e5d26..075f3101937 100644 --- a/torchvision/csrc/nms.cpp +++ b/torchvision/csrc/nms.cpp @@ -1,17 +1,10 @@ -#pragma once +#include "nms.h" +#include -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "autocast.h" -#include "cuda/vision_cuda.h" -#endif -#ifdef WITH_HIP -#include "autocast.h" -#include "hip/vision_cuda.h" +#if defined(WITH_CUDA) || defined(WITH_HIP) +#include #endif -// nms dispatch nexus at::Tensor nms( const at::Tensor& dets, const at::Tensor& scores, diff --git a/torchvision/csrc/nms.h b/torchvision/csrc/nms.h new file mode 100644 index 00000000000..87b07548454 --- /dev/null +++ b/torchvision/csrc/nms.h @@ -0,0 +1,24 @@ +#pragma once + +#include "cpu/nms_kernel.h" + +#ifdef WITH_CUDA +#include "cuda/nms_kernel.h" +#endif +#ifdef WITH_HIP +#include "hip/nms_kernel.h" +#endif + +// C++ Forward +at::Tensor nms( + const at::Tensor& dets, + const at::Tensor& scores, + double iou_threshold); + +// Autocast Forward +#if defined(WITH_CUDA) || defined(WITH_HIP) +at::Tensor nms_autocast( + const at::Tensor& dets, + const at::Tensor& scores, + double iou_threshold); +#endif From c57c460356ee1bee5f876894874c2e966c88bc56 Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 1 Dec 2020 18:34:41 +0000 Subject: [PATCH 5/6] Removing unnecessary repeated includes. --- torchvision/csrc/cuda/nms_kernel.cu | 3 --- 1 file changed, 3 deletions(-) diff --git a/torchvision/csrc/cuda/nms_kernel.cu b/torchvision/csrc/cuda/nms_kernel.cu index 6fa58bfc329..8785bd84897 100644 --- a/torchvision/csrc/cuda/nms_kernel.cu +++ b/torchvision/csrc/cuda/nms_kernel.cu @@ -5,9 +5,6 @@ #include "cuda_helpers.h" #include "nms_kernel.h" -#include -#include - namespace { int const threadsPerBlock = sizeof(unsigned long long) * 8; From 7bbaa39c1e5eba064e9167977dfb24ed9807557a Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Tue, 1 Dec 2020 19:48:55 +0000 Subject: [PATCH 6/6] Update CMakeLists.txt to include all headers. --- CMakeLists.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 81ca559d530..e6b97786888 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,9 +32,11 @@ file(GLOB HEADERS torchvision/csrc/*.h) # Image extension file(GLOB IMAGE_HEADERS torchvision/csrc/cpu/image/*.h) file(GLOB IMAGE_SOURCES torchvision/csrc/cpu/image/*.cpp) -file(GLOB OPERATOR_SOURCES torchvision/csrc/cpu/*.h torchvision/csrc/cpu/*.cpp ${IMAGE_HEADERS} ${IMAGE_SOURCES} ${HEADERS} torchvision/csrc/*.cpp) +file(GLOB OPERATOR_HEADERS torchvision/csrc/cpu/*.h) +file(GLOB OPERATOR_SOURCES ${OPERATOR_HEADERS} torchvision/csrc/cpu/*.cpp ${IMAGE_HEADERS} ${IMAGE_SOURCES} ${HEADERS} torchvision/csrc/*.cpp) if(WITH_CUDA) - file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} torchvision/csrc/cuda/*.h torchvision/csrc/cuda/*.cu) + file(GLOB OPERATOR_HEADERS ${OPERATOR_HEADERS} torchvision/csrc/cuda/*.h) + file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} ${OPERATOR_HEADERS} torchvision/csrc/cuda/*.cu) endif() file(GLOB MODELS_HEADERS torchvision/csrc/models/*.h) file(GLOB MODELS_SOURCES torchvision/csrc/models/*.h torchvision/csrc/models/*.cpp) @@ -95,11 +97,11 @@ install(EXPORT TorchVisionTargets install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}) install(FILES - torchvision/csrc/cpu/vision_cpu.h + ${OPERATOR_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cpu) if(WITH_CUDA) install(FILES - torchvision/csrc/cuda/vision_cuda.h + ${OPERATOR_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda) endif() install(FILES ${MODELS_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/models)