diff --git a/docs/data/doxygenOutputs/effects_augmentations_erase_img150x150.png b/docs/data/doxygenOutputs/effects_augmentations_erase_img150x150.png new file mode 100644 index 000000000..136eca9bb Binary files /dev/null and b/docs/data/doxygenOutputs/effects_augmentations_erase_img150x150.png differ diff --git a/include/rppt_tensor_effects_augmentations.h b/include/rppt_tensor_effects_augmentations.h index 365c22545..708f318bf 100644 --- a/include/rppt_tensor_effects_augmentations.h +++ b/include/rppt_tensor_effects_augmentations.h @@ -509,6 +509,56 @@ RppStatus rppt_gaussian_noise_voxel_host(RppPtr_t srcPtr, RpptGenericDescPtr src RppStatus rppt_gaussian_noise_voxel_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcDescPtr, RppPtr_t dstPtr, RpptGenericDescPtr dstDescPtr, Rpp32f *meanTensor, Rpp32f *stdDevTensor, Rpp32u seed, RpptROI3DPtr roiGenericPtrSrc, RpptRoi3DType roiType, rppHandle_t rppHandle); #endif // GPU_SUPPORT +/*! \brief Erase augmentation on HOST backend for a NCHW/NHWC layout tensor + * \details This function erases one or more user defined regions from an image, for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.
+ * srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127). + * dstPtr depth ranges - Will be same depth as srcPtr. + * \image html img150x150.png Sample Input + * \image html effects_augmentations_erase_img150x150.png Sample Output + * \param [in] srcPtr source tensor in HOST memory + * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3) + * \param [out] dstPtr destination tensor in HOST memory + * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr) + * \param [in] anchorBoxInfoTensor anchorBoxInfo values of type RpptRoiLtrb for each erase-region inside each image in the batch. Restrictions - + - 0 <= anchorBoxInfo[i] < respective image width/height + - Erase-region anchor boxes on each image given by the user must not overlap + * \param [in] colorsTensor RGB values to use for each erase-region inside each image in the batch. (colors[i] will have range equivalent of srcPtr) + * \param [in] numBoxesTensor number of erase-regions per image, for each image in the batch. (numBoxesTensor[n] >= 0) + * \param [in] roiTensorSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) + * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) + * \param [in] rppHandle RPP HOST handle created with \ref rppCreateWithBatchSize() + * \return A \ref RppStatus enumeration. + * \retval RPP_SUCCESS Successful completion. + * \retval RPP_ERROR* Unsuccessful completion. + */ +RppStatus rppt_erase_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptRoiLtrb *anchorBoxInfoTensor, RppPtr_t colorsTensor, Rpp32u *numBoxesTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle); + +#ifdef GPU_SUPPORT +/*! \brief Erase augmentation on HIP backend for a NCHW/NHWC layout tensor + * \details This function erases one or more user defined regions from an image, for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.
+ * srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127). + * dstPtr depth ranges - Will be same depth as srcPtr. + * \image html img150x150.jpg Sample Input + * \image html effects_augmentations_erase_img150x150.jpg Sample Output + * \param [in] srcPtr source tensor in HIP memory + * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3) + * \param [out] dstPtr destination tensor in HIP memory + * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr) + * \param [in] anchorBoxInfoTensor anchorBoxInfo values of type RpptRoiLtrb for each erase-region inside each image in the batch. Restrictions - + - 0 <= anchorBoxInfo[i] < respective image width/height + - Erase-region anchor boxes on each image given by the user must not overlap + * \param [in] colorsTensor RGB values to use for each erase-region inside each image in the batch. (colors[i] will have range equivalent of srcPtr) + * \param [in] numBoxesTensor number of erase-regions per image, for each image in the batch. (numBoxesTensor[n] >= 0) + * \param [in] roiTensorSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) + * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) + * \param [in] rppHandle RPP HIP handle created with \ref rppCreateWithStreamAndBatchSize() + * \return A \ref RppStatus enumeration. + * \retval RPP_SUCCESS Successful completion. + * \retval RPP_ERROR* Unsuccessful completion. + */ +RppStatus rppt_erase_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptRoiLtrb *anchorBoxInfoTensor, RppPtr_t colorsTensor, Rpp32u *numBoxesTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle); +#endif // GPU_SUPPORT + /*! @} */ diff --git a/src/include/hip/rpp_hip_common.hpp b/src/include/hip/rpp_hip_common.hpp index 169ba8bd5..a5cd5972c 100644 --- a/src/include/hip/rpp_hip_common.hpp +++ b/src/include/hip/rpp_hip_common.hpp @@ -82,6 +82,7 @@ typedef union { uchar uc1[8]; uchar4 uc4[2]; typedef union { uchar uc1[24]; uchar4 uc4[6]; uchar3 uc3[8]; d_uchar8 uc8[3]; } d_uchar24; // schar +typedef struct { schar sc1[3]; } d_schar3_s; typedef struct { schar sc1[8]; } d_schar8_s; typedef struct { d_schar8_s sc8[3]; } d_schar24_s; diff --git a/src/modules/cpu/host_tensor_effects_augmentations.hpp b/src/modules/cpu/host_tensor_effects_augmentations.hpp index 8d50fa213..9388ed6bd 100644 --- a/src/modules/cpu/host_tensor_effects_augmentations.hpp +++ b/src/modules/cpu/host_tensor_effects_augmentations.hpp @@ -34,5 +34,6 @@ SOFTWARE. #include "kernel/water.hpp" #include "kernel/ricap.hpp" #include "kernel/vignette.hpp" +#include "kernel/erase.hpp" #endif // HOST_TENSOR_EFFECTS_AUGMENTATIONS_HPP diff --git a/src/modules/cpu/kernel/erase.hpp b/src/modules/cpu/kernel/erase.hpp new file mode 100644 index 000000000..487b0b56d --- /dev/null +++ b/src/modules/cpu/kernel/erase.hpp @@ -0,0 +1,332 @@ +/* +MIT License + +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "rppdefs.h" +#include "rpp_cpu_simd.hpp" +#include "rpp_cpu_common.hpp" + +template +RppStatus erase_host_tensor(T *srcPtr, + RpptDescPtr srcDescPtr, + T *dstPtr, + RpptDescPtr dstDescPtr, + RpptRoiLtrb *anchorBoxInfoTensor, + T *colorsTensor, + Rpp32u *numBoxesTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams layoutParams, + rpp::Handle& handle) +{ + RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) + { + RpptROI roi; + RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; + compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + + Rpp32u numBoxes = numBoxesTensor[batchCount]; + RpptRoiLtrb *anchorBoxInfo = anchorBoxInfoTensor + batchCount * numBoxes; + T *colors = colorsTensor + batchCount * numBoxes * srcDescPtr->c; + + T *srcPtrImage, *dstPtrImage; + srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; + dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; + + T *srcPtrChannel, *dstPtrChannel; + srcPtrChannel = srcPtrImage + (roi.xywhROI.xy.y * srcDescPtr->strides.hStride) + (roi.xywhROI.xy.x * layoutParams.bufferMultiplier); + dstPtrChannel = dstPtrImage; + T userPixel3[3]; + Rpp32u bufferLength = roi.xywhROI.roiWidth * layoutParams.bufferMultiplier * sizeof(T); + + // Erase with fused output-layout toggle (NHWC -> NCHW) + if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + T *srcPtrRow, *dstPtrRowR, *dstPtrRowG, *dstPtrRowB; + srcPtrRow = srcPtrChannel; + dstPtrRowR = dstPtrChannel; + dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride; + dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride; + + for(int i = 0; i < roi.xywhROI.roiHeight; i++) + { + T *srcPtrTemp, *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + srcPtrTemp = srcPtrRow; + dstPtrTempR = dstPtrRowR; + dstPtrTempG = dstPtrRowG; + dstPtrTempB = dstPtrRowB; + bool isErase = false; + Rpp32u bufferLength = 0; + T userPixelR, userPixelG, userPixelB; + + for (int j = 0; j < roi.xywhROI.roiWidth;) + { + for(int count = 0; count < numBoxes; count++) + { + Rpp32u x1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth)); + Rpp32u y1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight)); + Rpp32u x2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth)); + Rpp32u y2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight)); + Rpp32u countMul3 = count * 3; + userPixelR = colors[countMul3]; + userPixelG = colors[countMul3 + 1]; + userPixelB = colors[countMul3 + 2]; + if(i >= y1 && i <= y2 && j >= x1 && j <= x2) + { + isErase = true; + bufferLength = x2 - x1 + 1; + break; + } + } + if(isErase && bufferLength) + { + std::fill_n(dstPtrTempR, bufferLength, userPixelR); + std::fill_n(dstPtrTempG, bufferLength, userPixelG); + std::fill_n(dstPtrTempB, bufferLength, userPixelB); + srcPtrTemp += 3 * bufferLength; + j += bufferLength; + dstPtrTempR += bufferLength; + dstPtrTempG += bufferLength; + dstPtrTempB += bufferLength; + isErase = false; + } + else + { + *dstPtrTempR++ = srcPtrTemp[0]; + *dstPtrTempG++ = srcPtrTemp[1]; + *dstPtrTempB++ = srcPtrTemp[2]; + srcPtrTemp += 3; + j++; + } + } + + srcPtrRow += srcDescPtr->strides.hStride; + dstPtrRowR += dstDescPtr->strides.hStride; + dstPtrRowG += dstDescPtr->strides.hStride; + dstPtrRowB += dstDescPtr->strides.hStride; + } + } + + // Erase with fused output-layout toggle (NCHW -> NHWC) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + T *srcPtrRowR, *srcPtrRowG, *srcPtrRowB, *dstPtrRow; + srcPtrRowR = srcPtrChannel; + srcPtrRowG = srcPtrRowR + srcDescPtr->strides.cStride; + srcPtrRowB = srcPtrRowG + srcDescPtr->strides.cStride; + dstPtrRow = dstPtrChannel; + + for(int i = 0; i < roi.xywhROI.roiHeight; i++) + { + T *srcPtrTempR, *srcPtrTempG, *srcPtrTempB, *dstPtrTemp; + srcPtrTempR = srcPtrRowR; + srcPtrTempG = srcPtrRowG; + srcPtrTempB = srcPtrRowB; + dstPtrTemp = dstPtrRow; + bool isErase = false; + Rpp32u bufferLengthPerChannel = 0; + + for (int j = 0; j < roi.xywhROI.roiWidth;) + { + for(int count = 0; count < numBoxes; count++) + { + Rpp32u x1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth)); + Rpp32u y1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight)); + Rpp32u x2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth)); + Rpp32u y2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight)); + Rpp32u countMul3 = count * 3; + userPixel3[0] = colors[countMul3]; + userPixel3[1] = colors[countMul3 + 1]; + userPixel3[2] = colors[countMul3 + 2]; + + if(i >= y1 && i <= y2 && j >= x1 && j <= x2) + { + isErase = true; + bufferLengthPerChannel = x2 - x1 + 1; + break; + } + } + if(isErase && bufferLengthPerChannel) + { + for (int k = 0; k < bufferLengthPerChannel; k++) + { + memcpy(dstPtrTemp, userPixel3, sizeof(T) * 3); + dstPtrTemp += 3; + } + j += bufferLengthPerChannel; + srcPtrTempR += bufferLengthPerChannel; + srcPtrTempG += bufferLengthPerChannel; + srcPtrTempB += bufferLengthPerChannel; + isErase = false; + } + else + { + dstPtrTemp[0] = *srcPtrTempR++; + dstPtrTemp[1] = *srcPtrTempG++; + dstPtrTemp[2] = *srcPtrTempB++; + dstPtrTemp += 3; + j++; + } + } + + srcPtrRowR += srcDescPtr->strides.hStride; + srcPtrRowG += srcDescPtr->strides.hStride; + srcPtrRowB += srcDescPtr->strides.hStride; + dstPtrRow += dstDescPtr->strides.hStride; + } + } + + // Erase without fused output-layout toggle 3 channel(NCHW -> NCHW) + else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + // To copy ROI region in Image + for(int c = 0; c < layoutParams.channelParam; c++) + { + T *srcPtrRow, *dstPtrRow; + srcPtrRow = srcPtrChannel; + dstPtrRow = dstPtrChannel; + + for(int i = 0; i < roi.xywhROI.roiHeight; i++) + { + memcpy(dstPtrRow, srcPtrRow, bufferLength); + srcPtrRow += srcDescPtr->strides.hStride; + dstPtrRow += dstDescPtr->strides.hStride; + } + + srcPtrChannel += srcDescPtr->strides.cStride; + dstPtrChannel += dstDescPtr->strides.cStride; + } + + for(int count = 0; count < numBoxes; count++) + { + Rpp32u x1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth)); + Rpp32u y1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight)); + Rpp32u x2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth)); + Rpp32u y2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight)); + + Rpp32u pixelLocation = (y1 * srcDescPtr->strides.hStride) + (x1 * srcDescPtr->strides.wStride); + Rpp32u boxHeight = y2 - y1 + 1; + Rpp32u boxWidth = x2 - x1 + 1; + + T *dstPtrTempR, *dstPtrTempG, *dstPtrTempB; + dstPtrTempR = dstPtrImage + pixelLocation; + dstPtrTempG = dstPtrTempR + dstDescPtr->strides.cStride; + dstPtrTempB = dstPtrTempG + dstDescPtr->strides.cStride; + Rpp32u countMul3 = count * 3; + T userPixelR = colors[countMul3]; + T userPixelG = colors[countMul3 + 1]; + T userPixelB = colors[countMul3 + 2]; + for (int i = 0; i < boxHeight; i++) + { + std::fill_n(dstPtrTempR, boxWidth, userPixelR); + std::fill_n(dstPtrTempG, boxWidth, userPixelG); + std::fill_n(dstPtrTempB, boxWidth, userPixelB); + dstPtrTempR += dstDescPtr->strides.hStride; + dstPtrTempG += dstDescPtr->strides.hStride; + dstPtrTempB += dstDescPtr->strides.hStride; + } + } + } + // Erase without fused output-layout toggle 1 channel(NCHW -> NCHW) + else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + // To copy ROI region in Image + for(int i = 0; i < roi.xywhROI.roiHeight; i++) + { + memcpy(dstPtrChannel, srcPtrChannel, bufferLength); + srcPtrChannel += srcDescPtr->strides.hStride; + dstPtrChannel += dstDescPtr->strides.hStride; + } + + for (int count = 0; count < numBoxes; count++) + { + Rpp32u x1 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth); + Rpp32u y1 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight); + Rpp32u x2 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth); + Rpp32u y2 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight); + + Rpp32u pixelLocation = (y1 * srcDescPtr->strides.hStride) + (x1 * srcDescPtr->strides.wStride); + Rpp32u boxHeight = y2 - y1 + 1; + Rpp32u boxWidth = x2 - x1 + 1; + + T *dstPtrTemp; + dstPtrTemp = dstPtrImage + pixelLocation; + + T userPixel = colors[count]; + for (int i = 0; i < boxHeight; i++) + { + std::fill_n(dstPtrTemp, boxWidth, userPixel); + dstPtrTemp += dstDescPtr->strides.hStride; + } + } + } + + // Erase without fused output-layout toggle 3 channel(NHWC -> NHWC) + else + { + // To copy ROI region in Image + for(int i = 0; i < roi.xywhROI.roiHeight; i++) + { + memcpy(dstPtrChannel, srcPtrChannel, bufferLength); + srcPtrChannel += srcDescPtr->strides.hStride; + dstPtrChannel += dstDescPtr->strides.hStride; + } + + for(int count = 0; count < numBoxes; count++) + { + Rpp32u x1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth)); + Rpp32u y1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight)); + Rpp32u x2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth)); + Rpp32u y2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight)); + Rpp32u countMul3 = count * 3; + userPixel3[0] = colors[countMul3]; + userPixel3[1] = colors[countMul3 + 1]; + userPixel3[2] = colors[countMul3 + 2]; + + Rpp32u pixelLocation = (y1 * srcDescPtr->strides.hStride) + (x1 * srcDescPtr->strides.wStride); + Rpp32u boxHeight = y2 - y1 + 1; + Rpp32u boxWidth = x2 - x1 + 1; + T *dstPtrTemp; + dstPtrTemp = dstPtrImage + pixelLocation; + + for (int i = 0; i < boxHeight; i++) + { + T *dstPtrRow = dstPtrTemp; + for (int j = 0; j < boxWidth; j++) + { + memcpy(dstPtrRow, userPixel3, sizeof(T) * 3); + dstPtrRow += 3; + } + dstPtrTemp += dstDescPtr->strides.hStride; + } + } + } + } + + return RPP_SUCCESS; +} diff --git a/src/modules/hip/hip_tensor_effects_augmentations.hpp b/src/modules/hip/hip_tensor_effects_augmentations.hpp index 25cd9a863..abdfd30ab 100644 --- a/src/modules/hip/hip_tensor_effects_augmentations.hpp +++ b/src/modules/hip/hip_tensor_effects_augmentations.hpp @@ -34,5 +34,6 @@ SOFTWARE. #include "kernel/water.hpp" #include "kernel/ricap.hpp" #include "kernel/vignette.hpp" +#include "kernel/erase.hpp" #endif // HIP_TENSOR_EFFECTS_AUGMENTATIONS_HPP diff --git a/src/modules/hip/kernel/erase.hpp b/src/modules/hip/kernel/erase.hpp new file mode 100644 index 000000000..2591b53f0 --- /dev/null +++ b/src/modules/hip/kernel/erase.hpp @@ -0,0 +1,278 @@ +#include +#include "rpp_hip_common.hpp" + +// -------------------- Set 0 - Erase main kernels -------------------- +template +__global__ void erase_pkd_hip_tensor(T *dstPtr, + uint2 dstStridesNH, + RpptRoiLtrb *anchorBoxInfoTensor, + U *colorsTensor, + Rpp32u *numBoxesTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + return; + + Rpp32u numBoxes = numBoxesTensor[id_z]; + uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3; + + // check if the co-ordinates is within any user defined box + for (int i = 0; i < numBoxes; i++) + { + int temp = (id_z * numBoxes) + i; + if (id_x >= anchorBoxInfoTensor[temp].lt.x && id_x <= anchorBoxInfoTensor[temp].rb.x && id_y >= anchorBoxInfoTensor[temp].lt.y && id_y <= anchorBoxInfoTensor[temp].rb.y) + { + *reinterpret_cast(dstPtr + dstIdx) = static_cast(colorsTensor[temp]); + break; + } + } +} + +template +__global__ void erase_pln_hip_tensor(T *dstPtr, + uint3 dstStridesNCH, + RpptRoiLtrb *anchorBoxInfoTensor, + T *colorsTensor, + Rpp32u *numBoxesTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + return; + + Rpp32u numBoxes = numBoxesTensor[id_z]; + uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x; + + // check if the co-ordinates is within any user defined box + for (int i = 0; i < numBoxes; i++) + { + int temp = (id_z * numBoxes) + i; + if (id_x >= anchorBoxInfoTensor[temp].lt.x && id_x <= anchorBoxInfoTensor[temp].rb.x && id_y >= anchorBoxInfoTensor[temp].lt.y && id_y <= anchorBoxInfoTensor[temp].rb.y) + { + *static_cast((dstPtr + dstIdx)) = colorsTensor[temp]; + break; + } + } +} + +template +__global__ void erase_pln3_hip_tensor(T *dstPtr, + uint3 dstStridesNCH, + RpptRoiLtrb *anchorBoxInfoTensor, + T *colorsTensor, + Rpp32u *numBoxesTensor, + RpptROIPtr roiTensorPtrSrc) +{ + int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth)) + return; + + Rpp32u numBoxes = numBoxesTensor[id_z]; + uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x; + + // check if the co-ordinates is within any user defined box + for (int i = 0; i < numBoxes; i++) + { + int temp = (id_z * numBoxes) + i; + if (id_x >= anchorBoxInfoTensor[temp].lt.x && id_x <= anchorBoxInfoTensor[temp].rb.x && id_y >= anchorBoxInfoTensor[temp].lt.y && id_y <= anchorBoxInfoTensor[temp].rb.y) + { + temp *= 3; + *static_cast(dstPtr + dstIdx) = colorsTensor[temp]; + dstIdx += dstStridesNCH.y; + *static_cast(dstPtr + dstIdx) = colorsTensor[temp + 1]; + dstIdx += dstStridesNCH.y; + *static_cast(dstPtr + dstIdx) = colorsTensor[temp + 2]; + break; + } + } +} + +// -------------------- Set 1 - Kernel Executors -------------------- +template +RppStatus hip_exec_erase_tensor(T *srcPtr, + RpptDescPtr srcDescPtr, + T *dstPtr, + RpptDescPtr dstDescPtr, + RpptRoiLtrb *anchorBoxInfoTensor, + U *colorsTensor, + Rpp32u *numBoxesTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + rpp::Handle& handle) +{ + if (roiType == RpptRoiType::LTRB) + hip_exec_roi_converison_ltrb_to_xywh(roiTensorPtrSrc, handle); + + int globalThreads_x = dstDescPtr->w; + int globalThreads_y = dstDescPtr->h; + int globalThreads_z = handle.GetBatchSize(); + + if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + if (srcDescPtr->dataType == RpptDataType::U8) + { + hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp8u)), hipMemcpyDeviceToDevice, handle.GetStream()); + hipStreamSynchronize(handle.GetStream()); + hipLaunchKernelGGL(erase_pkd_hip_tensor, + dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + dstPtr, + make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride), + anchorBoxInfoTensor, + reinterpret_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc); + } + else if (srcDescPtr->dataType == RpptDataType::F16) + { + hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp16f)), hipMemcpyDeviceToDevice, handle.GetStream()); + hipStreamSynchronize(handle.GetStream()); + hipLaunchKernelGGL(erase_pkd_hip_tensor, + dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + dstPtr, + make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride), + anchorBoxInfoTensor, + reinterpret_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc); + } + else if (srcDescPtr->dataType == RpptDataType::F32) + { + hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp32f)), hipMemcpyDeviceToDevice, handle.GetStream()); + hipStreamSynchronize(handle.GetStream()); + hipLaunchKernelGGL(erase_pkd_hip_tensor, + dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + dstPtr, + make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride), + anchorBoxInfoTensor, + reinterpret_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc); + } + else if (srcDescPtr->dataType == RpptDataType::I8) + { + hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp8s)), hipMemcpyDeviceToDevice, handle.GetStream()); + hipStreamSynchronize(handle.GetStream()); + hipLaunchKernelGGL(erase_pkd_hip_tensor, + dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + dstPtr, + make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride), + anchorBoxInfoTensor, + reinterpret_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc); + } + } + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && dstDescPtr->c == 1) + { + hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(T)), hipMemcpyDeviceToDevice, handle.GetStream()); + hipStreamSynchronize(handle.GetStream()); + hipLaunchKernelGGL(erase_pln_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + anchorBoxInfoTensor, + colorsTensor, + numBoxesTensor, + roiTensorPtrSrc); + } + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && dstDescPtr->c == 3) + { + hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(T)), hipMemcpyDeviceToDevice, handle.GetStream()); + hipStreamSynchronize(handle.GetStream()); + hipLaunchKernelGGL(erase_pln3_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + anchorBoxInfoTensor, + colorsTensor, + numBoxesTensor, + roiTensorPtrSrc); + } + else if ((srcDescPtr->c == 3) && (dstDescPtr->c == 3)) + { + if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW)) + { + globalThreads_x = (dstDescPtr->w + 7) >> 3; + hipLaunchKernelGGL(convert_pkd3_pln3_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + roiTensorPtrSrc); + hipStreamSynchronize(handle.GetStream()); + globalThreads_x = dstDescPtr->w; + hipLaunchKernelGGL(erase_pln3_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + anchorBoxInfoTensor, + colorsTensor, + numBoxesTensor, + roiTensorPtrSrc); + } + else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC)) + { + globalThreads_x = (dstDescPtr->w + 7) >> 3; + hipLaunchKernelGGL(convert_pln3_pkd3_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride), + roiTensorPtrSrc); + hipStreamSynchronize(handle.GetStream()); + globalThreads_x = dstDescPtr->w; + hipLaunchKernelGGL(erase_pkd_hip_tensor, + dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + dstPtr, + make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride), + anchorBoxInfoTensor, + colorsTensor, + numBoxesTensor, + roiTensorPtrSrc); + } + } + + return RPP_SUCCESS; +} diff --git a/src/modules/rppt_tensor_effects_augmentations.cpp b/src/modules/rppt_tensor_effects_augmentations.cpp index 24cc8e032..8829a4ee0 100644 --- a/src/modules/rppt_tensor_effects_augmentations.cpp +++ b/src/modules/rppt_tensor_effects_augmentations.cpp @@ -721,6 +721,80 @@ RppStatus rppt_vignette_host(RppPtr_t srcPtr, } +/******************** erase ********************/ + +RppStatus rppt_erase_host(RppPtr_t srcPtr, + RpptDescPtr srcDescPtr, + RppPtr_t dstPtr, + RpptDescPtr dstDescPtr, + RpptRoiLtrb *anchorBoxInfoTensor, + RppPtr_t colorsTensor, + Rpp32u *numBoxesTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + rppHandle_t rppHandle) +{ + RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c); + if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) + { + erase_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + anchorBoxInfoTensor, + static_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) + { + erase_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + srcDescPtr->offsetInBytes), + dstDescPtr, + anchorBoxInfoTensor, + static_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) + { + erase_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + anchorBoxInfoTensor, + static_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) + { + erase_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + anchorBoxInfoTensor, + static_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + + return RPP_SUCCESS; +} + /******************** ricap ********************/ RppStatus rppt_ricap_host(RppPtr_t srcPtr, @@ -1575,4 +1649,76 @@ RppStatus rppt_vignette_gpu(RppPtr_t srcPtr, #endif // backend } +RppStatus rppt_erase_gpu(RppPtr_t srcPtr, + RpptDescPtr srcDescPtr, + RppPtr_t dstPtr, + RpptDescPtr dstDescPtr, + RpptRoiLtrb *anchorBoxInfoTensor, + RppPtr_t colorsTensor, + Rpp32u *numBoxesTensor, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + rppHandle_t rppHandle) +{ +#ifdef HIP_COMPILE + + if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) + { + hip_exec_erase_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + anchorBoxInfoTensor, + static_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) + { + hip_exec_erase_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + anchorBoxInfoTensor, + static_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) + { + hip_exec_erase_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + anchorBoxInfoTensor, + static_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) + { + hip_exec_erase_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + anchorBoxInfoTensor, + static_cast(colorsTensor), + numBoxesTensor, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + + return RPP_SUCCESS; +#elif defined(OCL_COMPILE) + return RPP_ERROR_NOT_IMPLEMENTED; +#endif // backend +} + #endif // GPU_SUPPORT diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp index bb357c030..604231e3c 100644 --- a/utilities/test_suite/HIP/Tensor_hip.cpp +++ b/utilities/test_suite/HIP/Tensor_hip.cpp @@ -356,6 +356,18 @@ int main(int argc, char **argv) if(testCase == 82) CHECK_RETURN_STATUS(hipHostMalloc(&roiPtrInputCropRegion, 4 * sizeof(RpptROI))); + Rpp32u boxesInEachImage = 3; + Rpp32f *colorBuffer; + RpptRoiLtrb *anchorBoxInfoTensor; + Rpp32u *numOfBoxes; + if(testCase == 32) + { + CHECK_RETURN_STATUS(hipHostMalloc(&colorBuffer, batchSize * boxesInEachImage * sizeof(Rpp32f))); + CHECK_RETURN_STATUS(hipMemset(colorBuffer, 0, batchSize * boxesInEachImage * sizeof(Rpp32f))); + CHECK_RETURN_STATUS(hipHostMalloc(&anchorBoxInfoTensor, batchSize * boxesInEachImage * sizeof(RpptRoiLtrb))); + CHECK_RETURN_STATUS(hipHostMalloc(&numOfBoxes, batchSize * sizeof(Rpp32u))); + } + // create cropRoi and patchRoi in case of crop_and_patch RpptROI *cropRoi, *patchRoi; if(testCase == 33) @@ -736,6 +748,19 @@ int main(int argc, char **argv) break; } + case 32: + { + testCaseName = "erase"; + + init_erase(batchSize, boxesInEachImage, numOfBoxes, anchorBoxInfoTensor, roiTensorPtrSrc, srcDescPtr->c, colorBuffer, inputBitDepth); + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_erase_gpu(d_input, srcDescPtr, d_output, dstDescPtr, anchorBoxInfoTensor, colorBuffer, numOfBoxes, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; + } case 33: { testCaseName = "crop_and_patch"; @@ -1433,6 +1458,12 @@ int main(int argc, char **argv) } if (reductionTypeCase) CHECK_RETURN_STATUS(hipHostFree(reductionFuncResultArr)); + if(testCase == 32) + { + CHECK_RETURN_STATUS(hipHostFree(colorBuffer)); + CHECK_RETURN_STATUS(hipHostFree(anchorBoxInfoTensor)); + CHECK_RETURN_STATUS(hipHostFree(numOfBoxes)); + } if(anchorTensor != NULL) CHECK_RETURN_STATUS(hipHostFree(anchorTensor)); if(shapeTensor != NULL) diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index d32d20e2b..f62103091 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -296,7 +296,7 @@ def rpp_test_suite_parser_and_validator(): subprocess.run(["make", "-j16"], cwd=".") # nosec # List of cases supported -supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90'] +supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '32', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90'] # Create folders based on testType and profilingOption if testType == 1 and profilingOption == "YES": diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index 051d4045a..f85ea5704 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -740,6 +740,47 @@ int main(int argc, char **argv) break; } + case 32: + { + testCaseName = "erase"; + Rpp32u boxesInEachImage = 3; + Rpp32f colorBuffer[batchSize * boxesInEachImage]; + RpptRoiLtrb anchorBoxInfoTensor[batchSize * boxesInEachImage]; + Rpp32u numOfBoxes[batchSize]; + int idx; + + init_erase(batchSize, boxesInEachImage, numOfBoxes, anchorBoxInfoTensor, roiTensorPtrSrc, srcDescPtr->c, colorBuffer, inputBitDepth); + + startWallTime = omp_get_wtime(); + startCpuTime = clock(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_erase_host(input, srcDescPtr, output, dstDescPtr, anchorBoxInfoTensor, colorBuffer, numOfBoxes, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; + } + case 33: + { + testCaseName = "crop_and_patch"; + + for (i = 0; i < batchSize; i++) + { + cropRoi[i].xywhROI.xy.x = patchRoi[i].xywhROI.xy.x = roiList[0]; + cropRoi[i].xywhROI.xy.y = patchRoi[i].xywhROI.xy.y = roiList[1]; + cropRoi[i].xywhROI.roiWidth = patchRoi[i].xywhROI.roiWidth = roiWidthList[i]; + cropRoi[i].xywhROI.roiHeight = patchRoi[i].xywhROI.roiHeight = roiHeightList[i]; + } + + startWallTime = omp_get_wtime(); + startCpuTime = clock(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_crop_and_patch_host(input, input_second, srcDescPtr, output, dstDescPtr, roiTensorPtrDst, cropRoi, patchRoi, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; + } case 34: { testCaseName = "lut"; @@ -803,27 +844,6 @@ int main(int argc, char **argv) break; } - case 33: - { - testCaseName = "crop_and_patch"; - - for (i = 0; i < batchSize; i++) - { - cropRoi[i].xywhROI.xy.x = patchRoi[i].xywhROI.xy.x = roiList[0]; - cropRoi[i].xywhROI.xy.y = patchRoi[i].xywhROI.xy.y = roiList[1]; - cropRoi[i].xywhROI.roiWidth = patchRoi[i].xywhROI.roiWidth = roiWidthList[i]; - cropRoi[i].xywhROI.roiHeight = patchRoi[i].xywhROI.roiHeight = roiHeightList[i]; - } - - startWallTime = omp_get_wtime(); - startCpuTime = clock(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_crop_and_patch_host(input, input_second, srcDescPtr, output, dstDescPtr, roiTensorPtrDst, cropRoi, patchRoi, roiTypeSrc, handle); - else - missingFuncFlag = 1; - - break; - } case 37: { testCaseName = "crop"; diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 6f8eb7f70..f83f981fa 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -272,7 +272,7 @@ def rpp_test_suite_parser_and_validator(): subprocess.run(["make", "-j16"], cwd=".") # nosec # List of cases supported -supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90'] +supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '32', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90'] print("\n\n\n\n\n") print("##########################################################################################") diff --git a/utilities/test_suite/REFERENCE_OUTPUT/erase/erase_u8_Tensor.bin b/utilities/test_suite/REFERENCE_OUTPUT/erase/erase_u8_Tensor.bin new file mode 100644 index 000000000..b3f0894ee Binary files /dev/null and b/utilities/test_suite/REFERENCE_OUTPUT/erase/erase_u8_Tensor.bin differ diff --git a/utilities/test_suite/rpp_test_suite_common.h b/utilities/test_suite/rpp_test_suite_common.h index 5623c1b7b..5e1528c81 100644 --- a/utilities/test_suite/rpp_test_suite_common.h +++ b/utilities/test_suite/rpp_test_suite_common.h @@ -81,6 +81,7 @@ std::map augmentationMap = {29, "water"}, {30, "non_linear_blend"}, {31, "color_cast"}, + {32, "erase"}, {33, "crop_and_patch"}, {34, "lut"}, {36, "color_twist"}, @@ -1396,3 +1397,77 @@ void init_slice(RpptGenericDescPtr descriptorPtr3D, RpptROIPtr roiPtrSrc, Rpp32u } } } + +// Erase Region initializer for unit and performance testing +void inline init_erase(int batchSize, int boxesInEachImage, Rpp32u* numOfBoxes, RpptRoiLtrb* anchorBoxInfoTensor, RpptROIPtr roiTensorPtrSrc, int channels, Rpp32f *colorBuffer, int inputBitDepth) +{ + Rpp8u *colors8u = reinterpret_cast(colorBuffer); + Rpp16f *colors16f = reinterpret_cast(colorBuffer); + Rpp32f *colors32f = colorBuffer; + Rpp8s *colors8s = reinterpret_cast(colorBuffer); + for(int i = 0; i < batchSize; i++) + { + numOfBoxes[i] = boxesInEachImage; + int idx = boxesInEachImage * i; + + anchorBoxInfoTensor[idx].lt.x = 0.125 * roiTensorPtrSrc[i].xywhROI.roiWidth; + anchorBoxInfoTensor[idx].lt.y = 0.125 * roiTensorPtrSrc[i].xywhROI.roiHeight; + anchorBoxInfoTensor[idx].rb.x = 0.375 * roiTensorPtrSrc[i].xywhROI.roiWidth; + anchorBoxInfoTensor[idx].rb.y = 0.375 * roiTensorPtrSrc[i].xywhROI.roiHeight; + + idx++; + anchorBoxInfoTensor[idx].lt.x = 0.125 * roiTensorPtrSrc[i].xywhROI.roiWidth; + anchorBoxInfoTensor[idx].lt.y = 0.625 * roiTensorPtrSrc[i].xywhROI.roiHeight; + anchorBoxInfoTensor[idx].rb.x = 0.875 * roiTensorPtrSrc[i].xywhROI.roiWidth; + anchorBoxInfoTensor[idx].rb.y = 0.875 * roiTensorPtrSrc[i].xywhROI.roiHeight; + + idx++; + anchorBoxInfoTensor[idx].lt.x = 0.75 * roiTensorPtrSrc[i].xywhROI.roiWidth; + anchorBoxInfoTensor[idx].lt.y = 0.125 * roiTensorPtrSrc[i].xywhROI.roiHeight; + anchorBoxInfoTensor[idx].rb.x = 0.875 * roiTensorPtrSrc[i].xywhROI.roiWidth; + anchorBoxInfoTensor[idx].rb.y = 0.5 * roiTensorPtrSrc[i].xywhROI.roiHeight; + + if(channels == 3) + { + int idx = boxesInEachImage * 3 * i; + colorBuffer[idx] = 0; + colorBuffer[idx + 1] = 0; + colorBuffer[idx + 2] = 240; + colorBuffer[idx + 3] = 0; + colorBuffer[idx + 4] = 240; + colorBuffer[idx + 5] = 0; + colorBuffer[idx + 6] = 240; + colorBuffer[idx + 7] = 0; + colorBuffer[idx + 8] = 0; + for (int j = 0; j < 9; j++) + { + if (!inputBitDepth) + colors8u[idx + j] = (Rpp8u)(colorBuffer[idx + j]); + else if (inputBitDepth == 1) + colors16f[idx + j] = (Rpp16f)(colorBuffer[idx + j] * ONE_OVER_255); + else if (inputBitDepth == 2) + colors32f[idx + j] = (Rpp32f)(colorBuffer[idx + j] * ONE_OVER_255); + else if (inputBitDepth == 5) + colors8s[idx + j] = (Rpp8s)(colorBuffer[idx + j] - 128); + } + } + else + { + int idx = boxesInEachImage * i; + colorBuffer[idx] = 240; + colorBuffer[idx + 1] = 120; + colorBuffer[idx + 2] = 60; + for (int j = 0; j < 3; j++) + { + if (!inputBitDepth) + colors8u[idx + j] = (Rpp8u)(colorBuffer[idx + j]); + else if (inputBitDepth == 1) + colors16f[idx + j] = (Rpp16f)(colorBuffer[idx + j] * ONE_OVER_255); + else if (inputBitDepth == 2) + colors32f[idx + j] = (Rpp32f)(colorBuffer[idx + j] * ONE_OVER_255); + else if (inputBitDepth == 5) + colors8s[idx + j] = (Rpp8s)(colorBuffer[idx + j] - 128); + } + } + } +}