Skip to content

Merge 3.4 #2013

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion modules/cudev/include/opencv2/cudev/block/scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ __device__ T blockScanInclusive(T data, volatile T* smem, uint tid)
}
else
{
// Read from smem[tid] (T val = smem[tid])
// and write to smem[tid + 1] (smem[tid + 1] = warpScanInclusive(mask, val))
// should be explicitly fenced by "__syncwarp" to get rid of
// "cuda-memcheck --tool racecheck" warnings.
__syncwarp(mask);

// calculate inclusive scan and write back to shared memory with offset 1
smem[tid + 1] = warpScanInclusive(mask, val);

Expand Down Expand Up @@ -197,10 +203,18 @@ __device__ T blockScanInclusive(T data, volatile T* smem, uint tid)

int quot = THREADS_NUM / WARP_SIZE;

T val;

if (tid < quot)
{
// grab top warp elements
T val = smem[tid];
val = smem[tid];
}

__syncthreads();

if (tid < quot)
{

if (0 == (THREADS_NUM & (WARP_SIZE - 1)))
{
Expand Down
6 changes: 4 additions & 2 deletions modules/cudev/include/opencv2/cudev/grid/detail/integral.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ namespace integral_detail
__shared__ D smem[NUM_SCAN_THREADS * 2];
__shared__ D carryElem;

carryElem = 0;
if (threadIdx.x == 0)
carryElem = 0;

__syncthreads();

Expand Down Expand Up @@ -105,7 +106,8 @@ namespace integral_detail
__shared__ D smem[NUM_SCAN_THREADS * 2];
__shared__ D carryElem;

carryElem = 0;
if (threadIdx.x == 0)
carryElem = 0;

__syncthreads();

Expand Down
2 changes: 1 addition & 1 deletion modules/cudev/include/opencv2/cudev/warp/scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ __device__ T warpScanInclusive(T data, volatile T* smem, uint tid)
#pragma unroll
for (int i = 1; i <= (WARP_SIZE / 2); i *= 2)
{
const T val = __shfl_up(data, i, WARP_SIZE);
const T val = shfl_up(data, i);
if (laneId >= i)
data += val;
}
Expand Down
5 changes: 5 additions & 0 deletions modules/cudev/include/opencv2/cudev/warp/shuffle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,11 @@ __device__ double shfl_up(double val, uint delta, int width = warpSize)
return __hiloint2double(hi, lo);
}

__device__ __forceinline__ unsigned long long shfl_up(unsigned long long val, uint delta, int width = warpSize)
{
return __shfl_up(val, delta, width);
}

#define CV_CUDEV_SHFL_UP_VEC_INST(input_type) \
__device__ __forceinline__ input_type ## 1 shfl_up(const input_type ## 1 & val, uint delta, int width = warpSize) \
{ \
Expand Down
6 changes: 4 additions & 2 deletions modules/optflow/src/pcaflow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,8 @@ void OpticalFlowPCAFlow::getSystem( OutputArray AOut, OutputArray b1Out, OutputA
Mat b2 = b2Out.getMat();

ocl::Kernel kernel( "fillDCTSampledPoints", _ocl_fillDCTSampledPointsSource );
size_t globSize[] = {features.size(), basisSize.width, basisSize.height};
CV_Assert(basisSize.width > 0 && basisSize.height > 0);
size_t globSize[] = {features.size(), (size_t)basisSize.width, (size_t)basisSize.height};
kernel
.args( cv::ocl::KernelArg::ReadOnlyNoSize( Mat( features ).getUMat( ACCESS_READ ) ),
cv::ocl::KernelArg::WriteOnlyNoSize( A ), (int)features.size(), (int)basisSize.width,
Expand Down Expand Up @@ -376,7 +377,8 @@ void OpticalFlowPCAFlow::getSystem( OutputArray A1Out, OutputArray A2Out, Output
Mat b2 = b2Out.getMat();

ocl::Kernel kernel( "fillDCTSampledPoints", _ocl_fillDCTSampledPointsSource );
size_t globSize[] = {features.size(), basisSize.width, basisSize.height};
CV_Assert(basisSize.width > 0 && basisSize.height > 0);
size_t globSize[] = {features.size(), (size_t)basisSize.width, (size_t)basisSize.height};
kernel
.args( cv::ocl::KernelArg::ReadOnlyNoSize( Mat( features ).getUMat( ACCESS_READ ) ),
cv::ocl::KernelArg::WriteOnlyNoSize( A ), (int)features.size(), (int)basisSize.width,
Expand Down
11 changes: 6 additions & 5 deletions modules/optflow/src/rlof/berlof_invoker.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1941,11 +1941,12 @@ class TrackerInvoker : public cv::ParallelLoopBody
{
if( dIptr[0] == 0 && dIptr[1] == 0)
continue;
short It[4] = {(Jptr[x] << 5) - Iptr[x],
(Jptr[x+cn]<< 5) - Iptr[x],
(Jptr1[x]<< 5) - Iptr[x],
(Jptr1[x+cn]<< 5) - Iptr[x]};

short It[4] = {
(short)((Jptr [x] << 5) - Iptr[x]),
(short)((Jptr [x+cn] << 5) - Iptr[x]),
(short)((Jptr1[x] << 5) - Iptr[x]),
(short)((Jptr1[x+cn] << 5) - Iptr[x])
};
_b1[0] += (float)(It[0]*dIptr[0]);
_b1[1] += (float)(It[1]*dIptr[0]);
_b1[2] += (float)(It[2]*dIptr[0]);
Expand Down
4 changes: 3 additions & 1 deletion modules/optflow/src/sparse_matching_gpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,9 @@ bool ocl_getAllDCTDescriptorsForImage( const Mat *imgCh, std::vector< GPCPatchDe
const Size sz = imgCh[0].size();
ocl::Kernel kernel( "getPatchDescriptor", ocl::optflow::sparse_matching_gpc_oclsrc,
format( "-DPATCH_RADIUS_DOUBLED=%d -DCV_PI=%f -DSQRT2_INV=%f", PATCH_RADIUS_DOUBLED, CV_PI, SQRT2_INV ) );
size_t globSize[] = {sz.height - 2 * patchRadius, sz.width - 2 * patchRadius};
CV_Assert(sz.height - 2 * patchRadius > 0);
CV_Assert(sz.width - 2 * patchRadius > 0);
size_t globSize[] = {(size_t)(sz.height - 2 * patchRadius), (size_t)(sz.width - 2 * patchRadius)};
UMat out( globSize[0] * globSize[1], GPCPatchDescriptor::nFeatures, CV_64F );
if (
kernel
Expand Down
1 change: 1 addition & 0 deletions modules/sfm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ ocv_add_module(sfm
opencv_calib3d
opencv_features2d
opencv_xfeatures2d
opencv_imgcodecs
WRAP python
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.

#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>

#include "libmv/base/vector_utils.h"
#include "libmv/correspondence/feature.h"
Expand Down