Skip to content
Permalink
Browse files

Use OpenCL command queue

  • Loading branch information
elpaso committed Aug 8, 2018
1 parent 16a49cd commit b09df53354a442283cbb0b8be380df05a6dd49ec
@@ -4,13 +4,19 @@ __kernel void processNineCellWindow( __global float *scanLine1,
__global float *scanLine2,
__global float *scanLine3,
__global float *resultLine,
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
__global float *rasterParams // mInputNodataValue, mOutputNodataValue, mZFactor, mCellSizeX, mCellSizeY
)
{
=======
__global float *rasterParams
) {
>>>>>>> Use OpenCL command queue

// Get the index of the current element
const int i = get_global_id(0);

<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
if ( scanLine2[i+1] == rasterParams[0] )
{
resultLine[i] = rasterParams[1];
@@ -36,5 +42,38 @@ __kernel void processNineCellWindow( __global float *scanLine1,
{
resultLine[i] = 180.0f + atan2pi( derX, derY ) * 180.0f;
}
=======
// Do the operation
//return (( (x31 - x11) + 2 * (x32 - x12) + (x33 - x13) ) / (8 * mCellSizeX))
float derX = calcFirstDer( scanLine1[i], scanLine2[i], scanLine3[i],
scanLine1[i+1], scanLine2[i+1], scanLine3[i+1],
scanLine1[i+2], scanLine2[i+2], scanLine3[i+2],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[3]
);
//return (((x11 - x13) + 2 * (x21 - x23) + (x31 - x33)) / ( 8 * mCellSizeY));
float derY = calcFirstDer( scanLine1[i+2], scanLine1[i+1], scanLine1[i],
scanLine2[i+2], scanLine2[i+1], scanLine2[i],
scanLine3[i+2], scanLine3[i+1], scanLine3[i],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[4]
);


if ( derX == rasterParams[1] || derY == rasterParams[1] ||
( derX == 0.0f && derY == 0.0f) )
{
resultLine[i] = rasterParams[1];
}
else
{
// 180.0 / M_PI = 57.29577951308232
float aspect = atan2( derX, derY ) * 57.29577951308232;
if ( aspect < 0 )
resultLine[i] = 90.0f - aspect;
else if (aspect > 90.0f)
// 360 + 90 = 450
resultLine[i] = 450.0f - aspect;
else
resultLine[i] = 90.0 - aspect;
>>>>>>> Use OpenCL command queue
}
}
@@ -4,13 +4,19 @@ __kernel void processNineCellWindow( __global float *scanLine1,
__global float *scanLine2,
__global float *scanLine3,
__global float *resultLine,
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
__global float *rasterParams // mInputNodataValue, mOutputNodataValue, mZFactor, mCellSizeX, mCellSizeY
)
{
=======
__global float *rasterParams
) {
>>>>>>> Use OpenCL command queue

// Get the index of the current element
const int i = get_global_id(0);

<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
if ( scanLine2[i+1] == rasterParams[0] )
{
resultLine[i] = rasterParams[1];
@@ -38,5 +44,31 @@ __kernel void processNineCellWindow( __global float *scanLine1,
res = atanpi( res );
resultLine[i] = res * 180.0f;
}
=======
// Do the operation
//return (( (x31 - x11) + 2 * (x32 - x12) + (x33 - x13) ) / (8 * mCellSizeX))
float derX = calcFirstDer( scanLine1[i], scanLine2[i], scanLine3[i],
scanLine1[i+1], scanLine2[i+1], scanLine3[i+1],
scanLine1[i+2], scanLine2[i+2], scanLine3[i+2],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[3]
);
//return (((x11 - x13) + 2 * (x21 - x23) + (x31 - x33)) / ( 8 * mCellSizeY));
float derY = calcFirstDer( scanLine1[i+2], scanLine1[i+1], scanLine1[i],
scanLine2[i+2], scanLine2[i+1], scanLine2[i],
scanLine3[i+2], scanLine3[i+1], scanLine3[i],
rasterParams[0], rasterParams[1], rasterParams[2], rasterParams[4]
);


if ( derX == rasterParams[1] || derY == rasterParams[1] )
{
resultLine[i] = rasterParams[1];
}
else
{
float res = sqrt( derX * derX + derY * derY );
res = atanpi( res );
resultLine[i] = res * 180.0;
>>>>>>> Use OpenCL command queue
}
}

This file was deleted.

@@ -217,10 +217,20 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee

// Prepare context and queue
cl::Context ctx = QgsOpenClUtils::context();
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
cl::CommandQueue queue( ctx );

//keep only three scanlines in memory at a time, make room for initial and final nodata
QgsOpenClUtils::CPLAllocator<float> scanLine( xSize + 2 );
=======
cl::Context::setDefault( ctx );
cl::CommandQueue queue( ctx );

//keep only three scanlines in memory at a time, make room for initial and final nodata
QgsOpenClUtils::CPLAllocator<float> scanLine1( xSize + 2 );
QgsOpenClUtils::CPLAllocator<float> scanLine2( xSize + 2 );
QgsOpenClUtils::CPLAllocator<float> scanLine3( xSize + 2 );
>>>>>>> Use OpenCL command queue
QgsOpenClUtils::CPLAllocator<float> resultLine( xSize );

// Cast to float (because double just crashes on some GPUs)
@@ -289,7 +299,18 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
{
QgsDebugMsg( "Raster IO Error" );
}
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
queue.enqueueWriteBuffer( scanLine2Buffer, CL_TRUE, 0, bufferSize, scanLine.get() );
=======
}
else
{
//normally fetch only scanLine3 and release scanline 1 if we move forward one row
scanLine1.reset( scanLine2.release() );
scanLine2.reset( scanLine3.release() );
scanLine3.reset( xSize + 2 );
}
>>>>>>> Use OpenCL command queue

// Read scanline3: second real raster row
if ( GDALRasterIO( rasterBand, GF_Read, 0, i + 1, xSize, 1, &scanLine[1], xSize, 1, GDT_Float32, 0, 0 ) != CE_None )
@@ -320,6 +341,23 @@ int QgsNineCellFilter::processRasterGPU( const QString &source, QgsFeedback *fee
queue.enqueueWriteBuffer( *scanLineBuffer[rowIndex[2]], CL_TRUE, 0, bufferSize, scanLine.get() ); // row 0
}
}
<<<<<<< 16a49cddaa18cb6d0b12335fe24c68cda183e1c0
=======
// Set first and last extra colums to nodata
scanLine1[0] = scanLine1[xSize + 1] = mInputNodataValue;
scanLine2[0] = scanLine2[xSize + 1] = mInputNodataValue;
scanLine3[0] = scanLine3[xSize + 1] = mInputNodataValue;

// TODO: There is room for further optimization here: instead of replacing the buffers
// we could just replace just hthe new one (the top row) and switch the order
// of buffer arguments in the kernell call.
errorCode = cl::enqueueWriteBuffer( scanLine1Buffer, CL_TRUE, 0,
sizeof( float ) * ( xSize + 2 ), scanLine1.get() );
errorCode = cl::enqueueWriteBuffer( scanLine2Buffer, CL_TRUE, 0,
sizeof( float ) * ( xSize + 2 ), scanLine2.get() );
errorCode = cl::enqueueWriteBuffer( scanLine3Buffer, CL_TRUE, 0,
sizeof( float ) * ( xSize + 2 ), scanLine3.get() );
>>>>>>> Use OpenCL command queue

kernel( cl::EnqueueArgs(
queue,

This file was deleted.

0 comments on commit b09df53

Please sign in to comment.
You can’t perform that action at this time.