From abd3ca8bf8c452a5b4c3a883655de28120f8ca06 Mon Sep 17 00:00:00 2001 From: cudawarped <12133430+cudawarped@users.noreply.github.com> Date: Tue, 10 Jan 2023 08:43:36 +0200 Subject: [PATCH 1/2] cudev: fix 1D error introduced in PR 3378 --- .../cudalegacy/src/cuda/NCVBroxOpticalFlow.cu | 16 ++--- modules/cudawarping/test/test_remap.cpp | 4 +- .../include/opencv2/cudev/ptr2d/texture.hpp | 69 +++++++++++++------ 3 files changed, 57 insertions(+), 32 deletions(-) diff --git a/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu b/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu index a7f83c715d0..3a527a010c3 100644 --- a/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu +++ b/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu @@ -876,17 +876,17 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc, Texture texIyy(kLevelHeight, kLevelWidth, Iyy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror); Texture texIy0(kLevelHeight, kLevelWidth, Iy0.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror); Texture texIxy(kLevelHeight, kLevelWidth, Ixy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror); - Texture texDiffX(1, kLevelSizeInBytes / sizeof(float), diffusivity_x.ptr(), kLevelSizeInBytes); - Texture texDiffY(1, kLevelSizeInBytes / sizeof(float), diffusivity_y.ptr(), kLevelSizeInBytes); + Texture texDiffX(kLevelSizeInBytes, diffusivity_x.ptr()); + Texture texDiffY(kLevelSizeInBytes, diffusivity_y.ptr()); // flow - Texture texU(1, kLevelSizeInBytes / sizeof(float), ptrU->ptr(), kLevelSizeInBytes); - Texture texV(1, kLevelSizeInBytes / sizeof(float), ptrV->ptr(), kLevelSizeInBytes); + Texture texU(kLevelSizeInBytes, ptrU->ptr()); + Texture texV(kLevelSizeInBytes, ptrV->ptr()); // flow increments - Texture texDu(1, kLevelSizeInBytes / sizeof(float), du.ptr(), kLevelSizeInBytes); - Texture texDv(1, kLevelSizeInBytes / sizeof(float), dv.ptr(), kLevelSizeInBytes); - Texture texDuNew(1, kLevelSizeInBytes / sizeof(float), du_new.ptr(), kLevelSizeInBytes); - Texture texDvNew(1, kLevelSizeInBytes / sizeof(float), dv_new.ptr(), kLevelSizeInBytes); + Texture texDu(kLevelSizeInBytes, du.ptr()); + Texture texDv(kLevelSizeInBytes, dv.ptr()); + Texture texDuNew(kLevelSizeInBytes, du_new.ptr()); + Texture texDvNew(kLevelSizeInBytes, dv_new.ptr()); dim3 psor_blocks(iDivUp(kLevelWidth, PSOR_TILE_WIDTH), iDivUp(kLevelHeight, PSOR_TILE_HEIGHT)); dim3 psor_threads(PSOR_TILE_WIDTH, PSOR_TILE_HEIGHT); diff --git a/modules/cudawarping/test/test_remap.cpp b/modules/cudawarping/test/test_remap.cpp index ec7586638eb..b751072bbec 100644 --- a/modules/cudawarping/test/test_remap.cpp +++ b/modules/cudawarping/test/test_remap.cpp @@ -173,7 +173,7 @@ CUDA_TEST_P(Remap, Accuracy) INSTANTIATE_TEST_CASE_P(CUDA_Warping, Remap, testing::Combine( ALL_DEVICES, - DIFFERENT_SIZES, + DIFFERENT_SIZES_EXTRA, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)), testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)), @@ -198,7 +198,7 @@ CUDA_TEST_P(RemapOutOfScope, Regression_18224) INSTANTIATE_TEST_CASE_P(CUDA_Warping, RemapOutOfScope, testing::Combine( ALL_DEVICES, - DIFFERENT_SIZES, + DIFFERENT_SIZES_EXTRA, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)), testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)), testing::Values(BorderType(cv::BORDER_CONSTANT)), diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp index 078373a4436..27a26102e29 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp @@ -94,13 +94,14 @@ namespace cv { namespace cudev { __host__ UniqueTexture(const size_t sizeInBytes, T* data, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) { - create(1, static_cast(sizeInBytes/sizeof(T)), data, sizeInBytes, normalizedCoords, filterMode, addressMode, readMode); + create(sizeInBytes, data, normalizedCoords, filterMode, addressMode, readMode); } __host__ ~UniqueTexture() { if (tex != cudaTextureObject_t()) { try { CV_CUDEV_SAFE_CALL(cudaDestroyTextureObject(tex)); + CV_CUDEV_SAFE_CALL(cudaFree(internalSrc)); } catch (const cv::Exception& ex) { std::ostringstream os; @@ -132,6 +133,38 @@ namespace cv { namespace cudev { __host__ explicit operator bool() const noexcept { return tex != cudaTextureObject_t(); } private: + __host__ void createTextureObject(cudaResourceDesc texRes, const bool normalizedCoords, const cudaTextureFilterMode filterMode, + const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) + { + cudaTextureDesc texDescr; + std::memset(&texDescr, 0, sizeof(texDescr)); + texDescr.normalizedCoords = normalizedCoords; + texDescr.filterMode = filterMode; + texDescr.addressMode[0] = addressMode; + texDescr.addressMode[1] = addressMode; + texDescr.addressMode[2] = addressMode; + texDescr.readMode = readMode; + CV_CUDEV_SAFE_CALL(cudaCreateTextureObject(&tex, &texRes, &texDescr, 0)); + } + + template + __host__ void create(const size_t sizeInBytes, T1* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode, + const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) + { + cudaResourceDesc texRes; + std::memset(&texRes, 0, sizeof(texRes)); + texRes.resType = cudaResourceTypeLinear; + texRes.res.linear.devPtr = data; + texRes.res.linear.sizeInBytes = sizeInBytes; + texRes.res.linear.desc = cudaCreateChannelDesc(); + createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode); + } + + __host__ void create(const size_t sizeInBytes, uint64* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode, + const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode) + { + create(sizeInBytes, (uint2*)data, normalizedCoords, filterMode, addressMode, readMode); + } template __host__ void create(const int rows, const int cols, T1* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode, @@ -139,32 +172,23 @@ namespace cv { namespace cudev { { cudaResourceDesc texRes; std::memset(&texRes, 0, sizeof(texRes)); - if (rows == 1) { - CV_Assert(rows == 1 && cols*sizeof(T) == step); - texRes.resType = cudaResourceTypeLinear; - texRes.res.linear.devPtr = data; - texRes.res.linear.sizeInBytes = step; - texRes.res.linear.desc = cudaCreateChannelDesc(); + texRes.resType = cudaResourceTypePitch2D; + texRes.res.pitch2D.height = rows; + texRes.res.pitch2D.width = cols; + // temporary fix for single row/columns until TexturePtr is reworked + if (rows == 1 || cols == 1) { + size_t dStep = 0; + CV_CUDEV_SAFE_CALL(cudaMallocPitch(&internalSrc, &dStep, cols * sizeof(T1), rows)); + CV_CUDEV_SAFE_CALL(cudaMemcpy2D(internalSrc, dStep, data, step, cols * sizeof(T1), rows, cudaMemcpyDeviceToDevice)); + texRes.res.pitch2D.devPtr = internalSrc; + texRes.res.pitch2D.pitchInBytes = dStep; } else { - texRes.resType = cudaResourceTypePitch2D; texRes.res.pitch2D.devPtr = data; - texRes.res.pitch2D.height = rows; - texRes.res.pitch2D.width = cols; texRes.res.pitch2D.pitchInBytes = step; - texRes.res.pitch2D.desc = cudaCreateChannelDesc(); } - - cudaTextureDesc texDescr; - std::memset(&texDescr, 0, sizeof(texDescr)); - texDescr.normalizedCoords = normalizedCoords; - texDescr.filterMode = filterMode; - texDescr.addressMode[0] = addressMode; - texDescr.addressMode[1] = addressMode; - texDescr.addressMode[2] = addressMode; - texDescr.readMode = readMode; - - CV_CUDEV_SAFE_CALL(cudaCreateTextureObject(&tex, &texRes, &texDescr, 0)); + texRes.res.pitch2D.desc = cudaCreateChannelDesc(); + createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode); } __host__ void create(const int rows, const int cols, uint64* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode, @@ -175,6 +199,7 @@ namespace cv { namespace cudev { private: cudaTextureObject_t tex; + T* internalSrc = 0; }; /** @brief sharable smart CUDA texture object From f48a261d8be0737cbef36da877b18754e7ca2014 Mon Sep 17 00:00:00 2001 From: Tomoaki Teshima Date: Fri, 13 Jan 2023 22:43:20 +0900 Subject: [PATCH 2/2] fix warnings --- .../include/opencv2/cudev/ptr2d/texture.hpp | 4 ++-- modules/xfeatures2d/src/cuda/surf.cu | 20 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp index 078373a4436..bb7d6b0518c 100644 --- a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp +++ b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp @@ -250,9 +250,9 @@ namespace cv { namespace cudev { { } - __host__ TextureOff(PtrStepSz src, const int yoff = 0, const int xoff = 0, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, + __host__ TextureOff(PtrStepSz src, const int yoff_ = 0, const int xoff_ = 0, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint, const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) : - TextureOff(src.rows, src.cols, src.data, src.step, yoff, xoff, normalizedCoords, filterMode, addressMode, readMode) + TextureOff(src.rows, src.cols, src.data, src.step, yoff_, xoff_, normalizedCoords, filterMode, addressMode, readMode) { } diff --git a/modules/xfeatures2d/src/cuda/surf.cu b/modules/xfeatures2d/src/cuda/surf.cu index 6576ca736ce..b8ef4d627e4 100644 --- a/modules/xfeatures2d/src/cuda/surf.cu +++ b/modules/xfeatures2d/src/cuda/surf.cu @@ -233,17 +233,21 @@ namespace cv { namespace cuda { namespace device __host__ Mask(cudev::TexturePtr tex_): tex(tex_) {}; __device__ bool check(int sum_i, int sum_j, int size) { - if (!useMask) return true; - float ratio = (float)size / 9.0f; - + int dx1 = 0; + int dy1 = 0; + int dx2 = 0; + int dy2 = 0; + float ratio = 0; float d = 0; + float t = 0; - int dx1 = __float2int_rn(ratio * c_DM[0]); - int dy1 = __float2int_rn(ratio * c_DM[1]); - int dx2 = __float2int_rn(ratio * c_DM[2]); - int dy2 = __float2int_rn(ratio * c_DM[3]); + if (!useMask) return true; + ratio = (float)size / 9.0f; + dx1 = __float2int_rn(ratio * c_DM[0]); + dy1 = __float2int_rn(ratio * c_DM[1]); + dx2 = __float2int_rn(ratio * c_DM[2]); + dy2 = __float2int_rn(ratio * c_DM[3]); - float t = 0; t += tex(sum_i + dy1, sum_j + dx1); t -= tex(sum_i + dy2, sum_j + dx1); t -= tex(sum_i + dy1, sum_j + dx2);