opencv · alalek · Jan 18, 2023 · Jan 10, 2023 · Jan 13, 2023 · Jan 13, 2023
diff --git a/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu b/modules/cudalegacy/src/cuda/NCVBroxOpticalFlow.cu
@@ -876,17 +876,17 @@ NCVStatus NCVBroxOpticalFlow(const NCVBroxOpticalFlowDescriptor desc,
             Texture texIyy(kLevelHeight, kLevelWidth, Iyy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
             Texture texIy0(kLevelHeight, kLevelWidth, Iy0.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
             Texture texIxy(kLevelHeight, kLevelWidth, Ixy.ptr(), kPitchTex, true, cudaFilterModeLinear, cudaAddressModeMirror);
-            Texture texDiffX(1, kLevelSizeInBytes / sizeof(float), diffusivity_x.ptr(), kLevelSizeInBytes);
-            Texture texDiffY(1, kLevelSizeInBytes / sizeof(float), diffusivity_y.ptr(), kLevelSizeInBytes);
+            Texture texDiffX(kLevelSizeInBytes, diffusivity_x.ptr());
+            Texture texDiffY(kLevelSizeInBytes, diffusivity_y.ptr());
 
             //    flow
-            Texture texU(1, kLevelSizeInBytes / sizeof(float), ptrU->ptr(), kLevelSizeInBytes);
-            Texture texV(1, kLevelSizeInBytes / sizeof(float), ptrV->ptr(), kLevelSizeInBytes);
+            Texture texU(kLevelSizeInBytes, ptrU->ptr());
+            Texture texV(kLevelSizeInBytes, ptrV->ptr());
             //    flow increments
-            Texture texDu(1, kLevelSizeInBytes / sizeof(float), du.ptr(), kLevelSizeInBytes);
-            Texture texDv(1, kLevelSizeInBytes / sizeof(float), dv.ptr(), kLevelSizeInBytes);
-            Texture texDuNew(1, kLevelSizeInBytes / sizeof(float), du_new.ptr(), kLevelSizeInBytes);
-            Texture texDvNew(1, kLevelSizeInBytes / sizeof(float), dv_new.ptr(), kLevelSizeInBytes);
+            Texture texDu(kLevelSizeInBytes, du.ptr());
+            Texture texDv(kLevelSizeInBytes, dv.ptr());
+            Texture texDuNew(kLevelSizeInBytes, du_new.ptr());
+            Texture texDvNew(kLevelSizeInBytes, dv_new.ptr());
 
             dim3 psor_blocks(iDivUp(kLevelWidth, PSOR_TILE_WIDTH), iDivUp(kLevelHeight, PSOR_TILE_HEIGHT));
             dim3 psor_threads(PSOR_TILE_WIDTH, PSOR_TILE_HEIGHT);

diff --git a/modules/cudawarping/test/test_remap.cpp b/modules/cudawarping/test/test_remap.cpp
@@ -173,7 +173,7 @@ CUDA_TEST_P(Remap, Accuracy)
 
 INSTANTIATE_TEST_CASE_P(CUDA_Warping, Remap, testing::Combine(
     ALL_DEVICES,
-    DIFFERENT_SIZES,
+    DIFFERENT_SIZES_EXTRA,
     testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
     testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
     testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT), BorderType(cv::BORDER_WRAP)),
@@ -198,7 +198,7 @@ CUDA_TEST_P(RemapOutOfScope, Regression_18224)
 
 INSTANTIATE_TEST_CASE_P(CUDA_Warping, RemapOutOfScope, testing::Combine(
         ALL_DEVICES,
-        DIFFERENT_SIZES,
+        DIFFERENT_SIZES_EXTRA,
         testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
         testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR)),
         testing::Values(BorderType(cv::BORDER_CONSTANT)),

diff --git a/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp b/modules/cudev/include/opencv2/cudev/ptr2d/texture.hpp
@@ -94,13 +94,14 @@ namespace cv {  namespace cudev {
         __host__ UniqueTexture(const size_t sizeInBytes, T* data, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint,
             const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType)
         {
-            create(1, static_cast<int>(sizeInBytes/sizeof(T)), data, sizeInBytes, normalizedCoords, filterMode, addressMode, readMode);
+            create(sizeInBytes, data, normalizedCoords, filterMode, addressMode, readMode);
         }
 
         __host__ ~UniqueTexture() {
             if (tex != cudaTextureObject_t()) {
                 try {
                     CV_CUDEV_SAFE_CALL(cudaDestroyTextureObject(tex));
+                    CV_CUDEV_SAFE_CALL(cudaFree(internalSrc));
                 }
                 catch (const cv::Exception& ex) {
                     std::ostringstream os;
@@ -132,39 +133,62 @@ namespace cv {  namespace cudev {
         __host__ explicit operator bool() const noexcept { return tex != cudaTextureObject_t(); }
 
     private:
+        __host__ void createTextureObject(cudaResourceDesc texRes, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
+            const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
+        {
+            cudaTextureDesc texDescr;
+            std::memset(&texDescr, 0, sizeof(texDescr));
+            texDescr.normalizedCoords = normalizedCoords;
+            texDescr.filterMode = filterMode;
+            texDescr.addressMode[0] = addressMode;
+            texDescr.addressMode[1] = addressMode;
+            texDescr.addressMode[2] = addressMode;
+            texDescr.readMode = readMode;
+            CV_CUDEV_SAFE_CALL(cudaCreateTextureObject(&tex, &texRes, &texDescr, 0));
+        }
+
+        template <class T1>
+        __host__ void create(const size_t sizeInBytes, T1* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
+            const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
+        {
+            cudaResourceDesc texRes;
+            std::memset(&texRes, 0, sizeof(texRes));
+            texRes.resType = cudaResourceTypeLinear;
+            texRes.res.linear.devPtr = data;
+            texRes.res.linear.sizeInBytes = sizeInBytes;
+            texRes.res.linear.desc = cudaCreateChannelDesc<T1>();
+            createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode);
+        }
+
+        __host__ void create(const size_t sizeInBytes, uint64* data, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
+            const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
+        {
+            create<uint2>(sizeInBytes, (uint2*)data, normalizedCoords, filterMode, addressMode, readMode);
+        }
 
         template <class T1>
         __host__ void create(const int rows, const int cols, T1* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
             const cudaTextureAddressMode addressMode, const cudaTextureReadMode readMode)
         {
             cudaResourceDesc texRes;
             std::memset(&texRes, 0, sizeof(texRes));
-            if (rows == 1) {
-                CV_Assert(rows == 1 && cols*sizeof(T) == step);
-                texRes.resType = cudaResourceTypeLinear;
-                texRes.res.linear.devPtr = data;
-                texRes.res.linear.sizeInBytes = step;
-                texRes.res.linear.desc = cudaCreateChannelDesc<T1>();
+            texRes.resType = cudaResourceTypePitch2D;
+            texRes.res.pitch2D.height = rows;
+            texRes.res.pitch2D.width = cols;
+            // temporary fix for single row/columns until TexturePtr is reworked
+            if (rows == 1 || cols == 1) {
+                size_t dStep = 0;
+                CV_CUDEV_SAFE_CALL(cudaMallocPitch(&internalSrc, &dStep, cols * sizeof(T1), rows));
+                CV_CUDEV_SAFE_CALL(cudaMemcpy2D(internalSrc, dStep, data, step, cols * sizeof(T1), rows, cudaMemcpyDeviceToDevice));
+                texRes.res.pitch2D.devPtr = internalSrc;
+                texRes.res.pitch2D.pitchInBytes = dStep;
             }
             else {
-                texRes.resType = cudaResourceTypePitch2D;
                 texRes.res.pitch2D.devPtr = data;
-                texRes.res.pitch2D.height = rows;
-                texRes.res.pitch2D.width = cols;
                 texRes.res.pitch2D.pitchInBytes = step;
-                texRes.res.pitch2D.desc = cudaCreateChannelDesc<T1>();
             }
-
-            cudaTextureDesc texDescr;
-            std::memset(&texDescr, 0, sizeof(texDescr));
-            texDescr.normalizedCoords = normalizedCoords;
-            texDescr.filterMode = filterMode;
-            texDescr.addressMode[0] = addressMode;
-            texDescr.addressMode[1] = addressMode;
-            texDescr.addressMode[2] = addressMode;
-            texDescr.readMode = readMode;
-
-            CV_CUDEV_SAFE_CALL(cudaCreateTextureObject(&tex, &texRes, &texDescr, 0));
+            texRes.res.pitch2D.desc = cudaCreateChannelDesc<T1>();
+            createTextureObject(texRes, normalizedCoords, filterMode, addressMode, readMode);
         }
 
         __host__ void create(const int rows, const int cols, uint64* data, const size_t step, const bool normalizedCoords, const cudaTextureFilterMode filterMode,
@@ -175,6 +199,7 @@ namespace cv {  namespace cudev {
 
     private:
         cudaTextureObject_t tex;
+        T* internalSrc = 0;
     };
 
     /** @brief sharable smart CUDA texture object
@@ -250,9 +275,9 @@ namespace cv {  namespace cudev {
         {
         }
 
-        __host__ TextureOff(PtrStepSz<T> src, const int yoff = 0, const int xoff = 0, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint,
+        __host__ TextureOff(PtrStepSz<T> src, const int yoff_ = 0, const int xoff_ = 0, const bool normalizedCoords = false, const cudaTextureFilterMode filterMode = cudaFilterModePoint,
             const cudaTextureAddressMode addressMode = cudaAddressModeClamp, const cudaTextureReadMode readMode = cudaReadModeElementType) :
-            TextureOff(src.rows, src.cols, src.data, src.step, yoff, xoff, normalizedCoords, filterMode, addressMode, readMode)
+            TextureOff(src.rows, src.cols, src.data, src.step, yoff_, xoff_, normalizedCoords, filterMode, addressMode, readMode)
         {
         }
 

diff --git a/modules/xfeatures2d/src/cuda/surf.cu b/modules/xfeatures2d/src/cuda/surf.cu
@@ -233,17 +233,21 @@ namespace cv { namespace cuda { namespace device
             __host__ Mask(cudev::TexturePtr<unsigned int> tex_): tex(tex_) {};
             __device__ bool check(int sum_i, int sum_j, int size)
             {
-                if (!useMask) return true;
-                float ratio = (float)size / 9.0f;
-
+                int dx1 = 0;
+                int dy1 = 0;
+                int dx2 = 0;
+                int dy2 = 0;
+                float ratio = 0;
                 float d = 0;
+                float t = 0;
 
-                int dx1 = __float2int_rn(ratio * c_DM[0]);
-                int dy1 = __float2int_rn(ratio * c_DM[1]);
-                int dx2 = __float2int_rn(ratio * c_DM[2]);
-                int dy2 = __float2int_rn(ratio * c_DM[3]);
+                if (!useMask) return true;
+                ratio = (float)size / 9.0f;
+                dx1 = __float2int_rn(ratio * c_DM[0]);
+                dy1 = __float2int_rn(ratio * c_DM[1]);
+                dx2 = __float2int_rn(ratio * c_DM[2]);
+                dy2 = __float2int_rn(ratio * c_DM[3]);
 
-                float t = 0;
                 t += tex(sum_i + dy1, sum_j + dx1);
                 t -= tex(sum_i + dy2, sum_j + dx1);
                 t -= tex(sum_i + dy1, sum_j + dx2);