diff --git a/docs/data/doxygenOutputs/effects_augmentations_erase_img150x150.png b/docs/data/doxygenOutputs/effects_augmentations_erase_img150x150.png
new file mode 100644
index 000000000..136eca9bb
Binary files /dev/null and b/docs/data/doxygenOutputs/effects_augmentations_erase_img150x150.png differ
diff --git a/include/rppt_tensor_effects_augmentations.h b/include/rppt_tensor_effects_augmentations.h
index 365c22545..708f318bf 100644
--- a/include/rppt_tensor_effects_augmentations.h
+++ b/include/rppt_tensor_effects_augmentations.h
@@ -509,6 +509,56 @@ RppStatus rppt_gaussian_noise_voxel_host(RppPtr_t srcPtr, RpptGenericDescPtr src
RppStatus rppt_gaussian_noise_voxel_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcDescPtr, RppPtr_t dstPtr, RpptGenericDescPtr dstDescPtr, Rpp32f *meanTensor, Rpp32f *stdDevTensor, Rpp32u seed, RpptROI3DPtr roiGenericPtrSrc, RpptRoi3DType roiType, rppHandle_t rppHandle);
#endif // GPU_SUPPORT
+/*! \brief Erase augmentation on HOST backend for a NCHW/NHWC layout tensor
+ * \details This function erases one or more user defined regions from an image, for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.
+ * srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
+ * dstPtr depth ranges - Will be same depth as srcPtr.
+ * \image html img150x150.png Sample Input
+ * \image html effects_augmentations_erase_img150x150.png Sample Output
+ * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
+ * \param [out] dstPtr destination tensor in HOST memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
+ * \param [in] anchorBoxInfoTensor anchorBoxInfo values of type RpptRoiLtrb for each erase-region inside each image in the batch. Restrictions -
+ - 0 <= anchorBoxInfo[i] < respective image width/height
+ - Erase-region anchor boxes on each image given by the user must not overlap
+ * \param [in] colorsTensor RGB values to use for each erase-region inside each image in the batch. (colors[i] will have range equivalent of srcPtr)
+ * \param [in] numBoxesTensor number of erase-regions per image, for each image in the batch. (numBoxesTensor[n] >= 0)
+ * \param [in] roiTensorSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
+ * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
+ * \param [in] rppHandle RPP HOST handle created with \ref rppCreateWithBatchSize()
+ * \return A \ref RppStatus enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_erase_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptRoiLtrb *anchorBoxInfoTensor, RppPtr_t colorsTensor, Rpp32u *numBoxesTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
+
+#ifdef GPU_SUPPORT
+/*! \brief Erase augmentation on HIP backend for a NCHW/NHWC layout tensor
+ * \details This function erases one or more user defined regions from an image, for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.
+ * srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
+ * dstPtr depth ranges - Will be same depth as srcPtr.
+ * \image html img150x150.jpg Sample Input
+ * \image html effects_augmentations_erase_img150x150.jpg Sample Output
+ * \param [in] srcPtr source tensor in HIP memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
+ * \param [out] dstPtr destination tensor in HIP memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
+ * \param [in] anchorBoxInfoTensor anchorBoxInfo values of type RpptRoiLtrb for each erase-region inside each image in the batch. Restrictions -
+ - 0 <= anchorBoxInfo[i] < respective image width/height
+ - Erase-region anchor boxes on each image given by the user must not overlap
+ * \param [in] colorsTensor RGB values to use for each erase-region inside each image in the batch. (colors[i] will have range equivalent of srcPtr)
+ * \param [in] numBoxesTensor number of erase-regions per image, for each image in the batch. (numBoxesTensor[n] >= 0)
+ * \param [in] roiTensorSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
+ * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
+ * \param [in] rppHandle RPP HIP handle created with \ref rppCreateWithStreamAndBatchSize()
+ * \return A \ref RppStatus enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_erase_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptRoiLtrb *anchorBoxInfoTensor, RppPtr_t colorsTensor, Rpp32u *numBoxesTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
+#endif // GPU_SUPPORT
+
/*! @}
*/
diff --git a/src/include/hip/rpp_hip_common.hpp b/src/include/hip/rpp_hip_common.hpp
index 169ba8bd5..a5cd5972c 100644
--- a/src/include/hip/rpp_hip_common.hpp
+++ b/src/include/hip/rpp_hip_common.hpp
@@ -82,6 +82,7 @@ typedef union { uchar uc1[8]; uchar4 uc4[2];
typedef union { uchar uc1[24]; uchar4 uc4[6]; uchar3 uc3[8]; d_uchar8 uc8[3]; } d_uchar24;
// schar
+typedef struct { schar sc1[3]; } d_schar3_s;
typedef struct { schar sc1[8]; } d_schar8_s;
typedef struct { d_schar8_s sc8[3]; } d_schar24_s;
diff --git a/src/modules/cpu/host_tensor_effects_augmentations.hpp b/src/modules/cpu/host_tensor_effects_augmentations.hpp
index 8d50fa213..9388ed6bd 100644
--- a/src/modules/cpu/host_tensor_effects_augmentations.hpp
+++ b/src/modules/cpu/host_tensor_effects_augmentations.hpp
@@ -34,5 +34,6 @@ SOFTWARE.
#include "kernel/water.hpp"
#include "kernel/ricap.hpp"
#include "kernel/vignette.hpp"
+#include "kernel/erase.hpp"
#endif // HOST_TENSOR_EFFECTS_AUGMENTATIONS_HPP
diff --git a/src/modules/cpu/kernel/erase.hpp b/src/modules/cpu/kernel/erase.hpp
new file mode 100644
index 000000000..487b0b56d
--- /dev/null
+++ b/src/modules/cpu/kernel/erase.hpp
@@ -0,0 +1,332 @@
+/*
+MIT License
+
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include "rppdefs.h"
+#include "rpp_cpu_simd.hpp"
+#include "rpp_cpu_common.hpp"
+
+template
+RppStatus erase_host_tensor(T *srcPtr,
+ RpptDescPtr srcDescPtr,
+ T *dstPtr,
+ RpptDescPtr dstDescPtr,
+ RpptRoiLtrb *anchorBoxInfoTensor,
+ T *colorsTensor,
+ Rpp32u *numBoxesTensor,
+ RpptROIPtr roiTensorPtrSrc,
+ RpptRoiType roiType,
+ RppLayoutParams layoutParams,
+ rpp::Handle& handle)
+{
+ RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h};
+ Rpp32u numThreads = handle.GetNumThreads();
+
+ omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+ for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++)
+ {
+ RpptROI roi;
+ RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount];
+ compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+
+ Rpp32u numBoxes = numBoxesTensor[batchCount];
+ RpptRoiLtrb *anchorBoxInfo = anchorBoxInfoTensor + batchCount * numBoxes;
+ T *colors = colorsTensor + batchCount * numBoxes * srcDescPtr->c;
+
+ T *srcPtrImage, *dstPtrImage;
+ srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride;
+ dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride;
+
+ T *srcPtrChannel, *dstPtrChannel;
+ srcPtrChannel = srcPtrImage + (roi.xywhROI.xy.y * srcDescPtr->strides.hStride) + (roi.xywhROI.xy.x * layoutParams.bufferMultiplier);
+ dstPtrChannel = dstPtrImage;
+ T userPixel3[3];
+ Rpp32u bufferLength = roi.xywhROI.roiWidth * layoutParams.bufferMultiplier * sizeof(T);
+
+ // Erase with fused output-layout toggle (NHWC -> NCHW)
+ if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+ {
+ T *srcPtrRow, *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+ srcPtrRow = srcPtrChannel;
+ dstPtrRowR = dstPtrChannel;
+ dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+ dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+
+ for(int i = 0; i < roi.xywhROI.roiHeight; i++)
+ {
+ T *srcPtrTemp, *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+ srcPtrTemp = srcPtrRow;
+ dstPtrTempR = dstPtrRowR;
+ dstPtrTempG = dstPtrRowG;
+ dstPtrTempB = dstPtrRowB;
+ bool isErase = false;
+ Rpp32u bufferLength = 0;
+ T userPixelR, userPixelG, userPixelB;
+
+ for (int j = 0; j < roi.xywhROI.roiWidth;)
+ {
+ for(int count = 0; count < numBoxes; count++)
+ {
+ Rpp32u x1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth));
+ Rpp32u y1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight));
+ Rpp32u x2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth));
+ Rpp32u y2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight));
+ Rpp32u countMul3 = count * 3;
+ userPixelR = colors[countMul3];
+ userPixelG = colors[countMul3 + 1];
+ userPixelB = colors[countMul3 + 2];
+ if(i >= y1 && i <= y2 && j >= x1 && j <= x2)
+ {
+ isErase = true;
+ bufferLength = x2 - x1 + 1;
+ break;
+ }
+ }
+ if(isErase && bufferLength)
+ {
+ std::fill_n(dstPtrTempR, bufferLength, userPixelR);
+ std::fill_n(dstPtrTempG, bufferLength, userPixelG);
+ std::fill_n(dstPtrTempB, bufferLength, userPixelB);
+ srcPtrTemp += 3 * bufferLength;
+ j += bufferLength;
+ dstPtrTempR += bufferLength;
+ dstPtrTempG += bufferLength;
+ dstPtrTempB += bufferLength;
+ isErase = false;
+ }
+ else
+ {
+ *dstPtrTempR++ = srcPtrTemp[0];
+ *dstPtrTempG++ = srcPtrTemp[1];
+ *dstPtrTempB++ = srcPtrTemp[2];
+ srcPtrTemp += 3;
+ j++;
+ }
+ }
+
+ srcPtrRow += srcDescPtr->strides.hStride;
+ dstPtrRowR += dstDescPtr->strides.hStride;
+ dstPtrRowG += dstDescPtr->strides.hStride;
+ dstPtrRowB += dstDescPtr->strides.hStride;
+ }
+ }
+
+ // Erase with fused output-layout toggle (NCHW -> NHWC)
+ else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+ {
+ T *srcPtrRowR, *srcPtrRowG, *srcPtrRowB, *dstPtrRow;
+ srcPtrRowR = srcPtrChannel;
+ srcPtrRowG = srcPtrRowR + srcDescPtr->strides.cStride;
+ srcPtrRowB = srcPtrRowG + srcDescPtr->strides.cStride;
+ dstPtrRow = dstPtrChannel;
+
+ for(int i = 0; i < roi.xywhROI.roiHeight; i++)
+ {
+ T *srcPtrTempR, *srcPtrTempG, *srcPtrTempB, *dstPtrTemp;
+ srcPtrTempR = srcPtrRowR;
+ srcPtrTempG = srcPtrRowG;
+ srcPtrTempB = srcPtrRowB;
+ dstPtrTemp = dstPtrRow;
+ bool isErase = false;
+ Rpp32u bufferLengthPerChannel = 0;
+
+ for (int j = 0; j < roi.xywhROI.roiWidth;)
+ {
+ for(int count = 0; count < numBoxes; count++)
+ {
+ Rpp32u x1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth));
+ Rpp32u y1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight));
+ Rpp32u x2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth));
+ Rpp32u y2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight));
+ Rpp32u countMul3 = count * 3;
+ userPixel3[0] = colors[countMul3];
+ userPixel3[1] = colors[countMul3 + 1];
+ userPixel3[2] = colors[countMul3 + 2];
+
+ if(i >= y1 && i <= y2 && j >= x1 && j <= x2)
+ {
+ isErase = true;
+ bufferLengthPerChannel = x2 - x1 + 1;
+ break;
+ }
+ }
+ if(isErase && bufferLengthPerChannel)
+ {
+ for (int k = 0; k < bufferLengthPerChannel; k++)
+ {
+ memcpy(dstPtrTemp, userPixel3, sizeof(T) * 3);
+ dstPtrTemp += 3;
+ }
+ j += bufferLengthPerChannel;
+ srcPtrTempR += bufferLengthPerChannel;
+ srcPtrTempG += bufferLengthPerChannel;
+ srcPtrTempB += bufferLengthPerChannel;
+ isErase = false;
+ }
+ else
+ {
+ dstPtrTemp[0] = *srcPtrTempR++;
+ dstPtrTemp[1] = *srcPtrTempG++;
+ dstPtrTemp[2] = *srcPtrTempB++;
+ dstPtrTemp += 3;
+ j++;
+ }
+ }
+
+ srcPtrRowR += srcDescPtr->strides.hStride;
+ srcPtrRowG += srcDescPtr->strides.hStride;
+ srcPtrRowB += srcDescPtr->strides.hStride;
+ dstPtrRow += dstDescPtr->strides.hStride;
+ }
+ }
+
+ // Erase without fused output-layout toggle 3 channel(NCHW -> NCHW)
+ else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+ {
+ // To copy ROI region in Image
+ for(int c = 0; c < layoutParams.channelParam; c++)
+ {
+ T *srcPtrRow, *dstPtrRow;
+ srcPtrRow = srcPtrChannel;
+ dstPtrRow = dstPtrChannel;
+
+ for(int i = 0; i < roi.xywhROI.roiHeight; i++)
+ {
+ memcpy(dstPtrRow, srcPtrRow, bufferLength);
+ srcPtrRow += srcDescPtr->strides.hStride;
+ dstPtrRow += dstDescPtr->strides.hStride;
+ }
+
+ srcPtrChannel += srcDescPtr->strides.cStride;
+ dstPtrChannel += dstDescPtr->strides.cStride;
+ }
+
+ for(int count = 0; count < numBoxes; count++)
+ {
+ Rpp32u x1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth));
+ Rpp32u y1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight));
+ Rpp32u x2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth));
+ Rpp32u y2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight));
+
+ Rpp32u pixelLocation = (y1 * srcDescPtr->strides.hStride) + (x1 * srcDescPtr->strides.wStride);
+ Rpp32u boxHeight = y2 - y1 + 1;
+ Rpp32u boxWidth = x2 - x1 + 1;
+
+ T *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+ dstPtrTempR = dstPtrImage + pixelLocation;
+ dstPtrTempG = dstPtrTempR + dstDescPtr->strides.cStride;
+ dstPtrTempB = dstPtrTempG + dstDescPtr->strides.cStride;
+ Rpp32u countMul3 = count * 3;
+ T userPixelR = colors[countMul3];
+ T userPixelG = colors[countMul3 + 1];
+ T userPixelB = colors[countMul3 + 2];
+ for (int i = 0; i < boxHeight; i++)
+ {
+ std::fill_n(dstPtrTempR, boxWidth, userPixelR);
+ std::fill_n(dstPtrTempG, boxWidth, userPixelG);
+ std::fill_n(dstPtrTempB, boxWidth, userPixelB);
+ dstPtrTempR += dstDescPtr->strides.hStride;
+ dstPtrTempG += dstDescPtr->strides.hStride;
+ dstPtrTempB += dstDescPtr->strides.hStride;
+ }
+ }
+ }
+ // Erase without fused output-layout toggle 1 channel(NCHW -> NCHW)
+ else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+ {
+ // To copy ROI region in Image
+ for(int i = 0; i < roi.xywhROI.roiHeight; i++)
+ {
+ memcpy(dstPtrChannel, srcPtrChannel, bufferLength);
+ srcPtrChannel += srcDescPtr->strides.hStride;
+ dstPtrChannel += dstDescPtr->strides.hStride;
+ }
+
+ for (int count = 0; count < numBoxes; count++)
+ {
+ Rpp32u x1 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth);
+ Rpp32u y1 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight);
+ Rpp32u x2 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth);
+ Rpp32u y2 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight);
+
+ Rpp32u pixelLocation = (y1 * srcDescPtr->strides.hStride) + (x1 * srcDescPtr->strides.wStride);
+ Rpp32u boxHeight = y2 - y1 + 1;
+ Rpp32u boxWidth = x2 - x1 + 1;
+
+ T *dstPtrTemp;
+ dstPtrTemp = dstPtrImage + pixelLocation;
+
+ T userPixel = colors[count];
+ for (int i = 0; i < boxHeight; i++)
+ {
+ std::fill_n(dstPtrTemp, boxWidth, userPixel);
+ dstPtrTemp += dstDescPtr->strides.hStride;
+ }
+ }
+ }
+
+ // Erase without fused output-layout toggle 3 channel(NHWC -> NHWC)
+ else
+ {
+ // To copy ROI region in Image
+ for(int i = 0; i < roi.xywhROI.roiHeight; i++)
+ {
+ memcpy(dstPtrChannel, srcPtrChannel, bufferLength);
+ srcPtrChannel += srcDescPtr->strides.hStride;
+ dstPtrChannel += dstDescPtr->strides.hStride;
+ }
+
+ for(int count = 0; count < numBoxes; count++)
+ {
+ Rpp32u x1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth));
+ Rpp32u y1 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight));
+ Rpp32u x2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth));
+ Rpp32u y2 = static_cast(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight));
+ Rpp32u countMul3 = count * 3;
+ userPixel3[0] = colors[countMul3];
+ userPixel3[1] = colors[countMul3 + 1];
+ userPixel3[2] = colors[countMul3 + 2];
+
+ Rpp32u pixelLocation = (y1 * srcDescPtr->strides.hStride) + (x1 * srcDescPtr->strides.wStride);
+ Rpp32u boxHeight = y2 - y1 + 1;
+ Rpp32u boxWidth = x2 - x1 + 1;
+ T *dstPtrTemp;
+ dstPtrTemp = dstPtrImage + pixelLocation;
+
+ for (int i = 0; i < boxHeight; i++)
+ {
+ T *dstPtrRow = dstPtrTemp;
+ for (int j = 0; j < boxWidth; j++)
+ {
+ memcpy(dstPtrRow, userPixel3, sizeof(T) * 3);
+ dstPtrRow += 3;
+ }
+ dstPtrTemp += dstDescPtr->strides.hStride;
+ }
+ }
+ }
+ }
+
+ return RPP_SUCCESS;
+}
diff --git a/src/modules/hip/hip_tensor_effects_augmentations.hpp b/src/modules/hip/hip_tensor_effects_augmentations.hpp
index 25cd9a863..abdfd30ab 100644
--- a/src/modules/hip/hip_tensor_effects_augmentations.hpp
+++ b/src/modules/hip/hip_tensor_effects_augmentations.hpp
@@ -34,5 +34,6 @@ SOFTWARE.
#include "kernel/water.hpp"
#include "kernel/ricap.hpp"
#include "kernel/vignette.hpp"
+#include "kernel/erase.hpp"
#endif // HIP_TENSOR_EFFECTS_AUGMENTATIONS_HPP
diff --git a/src/modules/hip/kernel/erase.hpp b/src/modules/hip/kernel/erase.hpp
new file mode 100644
index 000000000..2591b53f0
--- /dev/null
+++ b/src/modules/hip/kernel/erase.hpp
@@ -0,0 +1,278 @@
+#include
+#include "rpp_hip_common.hpp"
+
+// -------------------- Set 0 - Erase main kernels --------------------
+template
+__global__ void erase_pkd_hip_tensor(T *dstPtr,
+ uint2 dstStridesNH,
+ RpptRoiLtrb *anchorBoxInfoTensor,
+ U *colorsTensor,
+ Rpp32u *numBoxesTensor,
+ RpptROIPtr roiTensorPtrSrc)
+{
+ int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+ int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+ int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+ if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+ return;
+
+ Rpp32u numBoxes = numBoxesTensor[id_z];
+ uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3;
+
+ // check if the co-ordinates is within any user defined box
+ for (int i = 0; i < numBoxes; i++)
+ {
+ int temp = (id_z * numBoxes) + i;
+ if (id_x >= anchorBoxInfoTensor[temp].lt.x && id_x <= anchorBoxInfoTensor[temp].rb.x && id_y >= anchorBoxInfoTensor[temp].lt.y && id_y <= anchorBoxInfoTensor[temp].rb.y)
+ {
+ *reinterpret_cast(dstPtr + dstIdx) = static_cast(colorsTensor[temp]);
+ break;
+ }
+ }
+}
+
+template
+__global__ void erase_pln_hip_tensor(T *dstPtr,
+ uint3 dstStridesNCH,
+ RpptRoiLtrb *anchorBoxInfoTensor,
+ T *colorsTensor,
+ Rpp32u *numBoxesTensor,
+ RpptROIPtr roiTensorPtrSrc)
+{
+ int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+ int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+ int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+ if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+ return;
+
+ Rpp32u numBoxes = numBoxesTensor[id_z];
+ uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x;
+
+ // check if the co-ordinates is within any user defined box
+ for (int i = 0; i < numBoxes; i++)
+ {
+ int temp = (id_z * numBoxes) + i;
+ if (id_x >= anchorBoxInfoTensor[temp].lt.x && id_x <= anchorBoxInfoTensor[temp].rb.x && id_y >= anchorBoxInfoTensor[temp].lt.y && id_y <= anchorBoxInfoTensor[temp].rb.y)
+ {
+ *static_cast((dstPtr + dstIdx)) = colorsTensor[temp];
+ break;
+ }
+ }
+}
+
+template
+__global__ void erase_pln3_hip_tensor(T *dstPtr,
+ uint3 dstStridesNCH,
+ RpptRoiLtrb *anchorBoxInfoTensor,
+ T *colorsTensor,
+ Rpp32u *numBoxesTensor,
+ RpptROIPtr roiTensorPtrSrc)
+{
+ int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+ int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+ int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+ if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+ return;
+
+ Rpp32u numBoxes = numBoxesTensor[id_z];
+ uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x;
+
+ // check if the co-ordinates is within any user defined box
+ for (int i = 0; i < numBoxes; i++)
+ {
+ int temp = (id_z * numBoxes) + i;
+ if (id_x >= anchorBoxInfoTensor[temp].lt.x && id_x <= anchorBoxInfoTensor[temp].rb.x && id_y >= anchorBoxInfoTensor[temp].lt.y && id_y <= anchorBoxInfoTensor[temp].rb.y)
+ {
+ temp *= 3;
+ *static_cast(dstPtr + dstIdx) = colorsTensor[temp];
+ dstIdx += dstStridesNCH.y;
+ *static_cast(dstPtr + dstIdx) = colorsTensor[temp + 1];
+ dstIdx += dstStridesNCH.y;
+ *static_cast(dstPtr + dstIdx) = colorsTensor[temp + 2];
+ break;
+ }
+ }
+}
+
+// -------------------- Set 1 - Kernel Executors --------------------
+template
+RppStatus hip_exec_erase_tensor(T *srcPtr,
+ RpptDescPtr srcDescPtr,
+ T *dstPtr,
+ RpptDescPtr dstDescPtr,
+ RpptRoiLtrb *anchorBoxInfoTensor,
+ U *colorsTensor,
+ Rpp32u *numBoxesTensor,
+ RpptROIPtr roiTensorPtrSrc,
+ RpptRoiType roiType,
+ rpp::Handle& handle)
+{
+ if (roiType == RpptRoiType::LTRB)
+ hip_exec_roi_converison_ltrb_to_xywh(roiTensorPtrSrc, handle);
+
+ int globalThreads_x = dstDescPtr->w;
+ int globalThreads_y = dstDescPtr->h;
+ int globalThreads_z = handle.GetBatchSize();
+
+ if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+ {
+ if (srcDescPtr->dataType == RpptDataType::U8)
+ {
+ hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp8u)), hipMemcpyDeviceToDevice, handle.GetStream());
+ hipStreamSynchronize(handle.GetStream());
+ hipLaunchKernelGGL(erase_pkd_hip_tensor,
+ dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)),
+ dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+ 0,
+ handle.GetStream(),
+ dstPtr,
+ make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+ anchorBoxInfoTensor,
+ reinterpret_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc);
+ }
+ else if (srcDescPtr->dataType == RpptDataType::F16)
+ {
+ hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp16f)), hipMemcpyDeviceToDevice, handle.GetStream());
+ hipStreamSynchronize(handle.GetStream());
+ hipLaunchKernelGGL(erase_pkd_hip_tensor,
+ dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)),
+ dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+ 0,
+ handle.GetStream(),
+ dstPtr,
+ make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+ anchorBoxInfoTensor,
+ reinterpret_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc);
+ }
+ else if (srcDescPtr->dataType == RpptDataType::F32)
+ {
+ hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp32f)), hipMemcpyDeviceToDevice, handle.GetStream());
+ hipStreamSynchronize(handle.GetStream());
+ hipLaunchKernelGGL(erase_pkd_hip_tensor,
+ dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)),
+ dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+ 0,
+ handle.GetStream(),
+ dstPtr,
+ make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+ anchorBoxInfoTensor,
+ reinterpret_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc);
+ }
+ else if (srcDescPtr->dataType == RpptDataType::I8)
+ {
+ hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp8s)), hipMemcpyDeviceToDevice, handle.GetStream());
+ hipStreamSynchronize(handle.GetStream());
+ hipLaunchKernelGGL(erase_pkd_hip_tensor,
+ dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)),
+ dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+ 0,
+ handle.GetStream(),
+ dstPtr,
+ make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+ anchorBoxInfoTensor,
+ reinterpret_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc);
+ }
+ }
+ else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && dstDescPtr->c == 1)
+ {
+ hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(T)), hipMemcpyDeviceToDevice, handle.GetStream());
+ hipStreamSynchronize(handle.GetStream());
+ hipLaunchKernelGGL(erase_pln_hip_tensor,
+ dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+ dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+ 0,
+ handle.GetStream(),
+ dstPtr,
+ make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+ anchorBoxInfoTensor,
+ colorsTensor,
+ numBoxesTensor,
+ roiTensorPtrSrc);
+ }
+ else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && dstDescPtr->c == 3)
+ {
+ hipMemcpyAsync(dstPtr, srcPtr, static_cast(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(T)), hipMemcpyDeviceToDevice, handle.GetStream());
+ hipStreamSynchronize(handle.GetStream());
+ hipLaunchKernelGGL(erase_pln3_hip_tensor,
+ dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+ dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+ 0,
+ handle.GetStream(),
+ dstPtr,
+ make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+ anchorBoxInfoTensor,
+ colorsTensor,
+ numBoxesTensor,
+ roiTensorPtrSrc);
+ }
+ else if ((srcDescPtr->c == 3) && (dstDescPtr->c == 3))
+ {
+ if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+ {
+ globalThreads_x = (dstDescPtr->w + 7) >> 3;
+ hipLaunchKernelGGL(convert_pkd3_pln3_hip_tensor,
+ dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+ dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+ 0,
+ handle.GetStream(),
+ srcPtr,
+ make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride),
+ dstPtr,
+ make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+ roiTensorPtrSrc);
+ hipStreamSynchronize(handle.GetStream());
+ globalThreads_x = dstDescPtr->w;
+ hipLaunchKernelGGL(erase_pln3_hip_tensor,
+ dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+ dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+ 0,
+ handle.GetStream(),
+ dstPtr,
+ make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+ anchorBoxInfoTensor,
+ colorsTensor,
+ numBoxesTensor,
+ roiTensorPtrSrc);
+ }
+ else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+ {
+ globalThreads_x = (dstDescPtr->w + 7) >> 3;
+ hipLaunchKernelGGL(convert_pln3_pkd3_hip_tensor,
+ dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+ dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+ 0,
+ handle.GetStream(),
+ srcPtr,
+ make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride),
+ dstPtr,
+ make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+ roiTensorPtrSrc);
+ hipStreamSynchronize(handle.GetStream());
+ globalThreads_x = dstDescPtr->w;
+ hipLaunchKernelGGL(erase_pkd_hip_tensor,
+ dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+ dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+ 0,
+ handle.GetStream(),
+ dstPtr,
+ make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+ anchorBoxInfoTensor,
+ colorsTensor,
+ numBoxesTensor,
+ roiTensorPtrSrc);
+ }
+ }
+
+ return RPP_SUCCESS;
+}
diff --git a/src/modules/rppt_tensor_effects_augmentations.cpp b/src/modules/rppt_tensor_effects_augmentations.cpp
index 24cc8e032..8829a4ee0 100644
--- a/src/modules/rppt_tensor_effects_augmentations.cpp
+++ b/src/modules/rppt_tensor_effects_augmentations.cpp
@@ -721,6 +721,80 @@ RppStatus rppt_vignette_host(RppPtr_t srcPtr,
}
+/******************** erase ********************/
+
+RppStatus rppt_erase_host(RppPtr_t srcPtr,
+ RpptDescPtr srcDescPtr,
+ RppPtr_t dstPtr,
+ RpptDescPtr dstDescPtr,
+ RpptRoiLtrb *anchorBoxInfoTensor,
+ RppPtr_t colorsTensor,
+ Rpp32u *numBoxesTensor,
+ RpptROIPtr roiTensorPtrSrc,
+ RpptRoiType roiType,
+ rppHandle_t rppHandle)
+{
+ RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c);
+ if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
+ {
+ erase_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes,
+ srcDescPtr,
+ static_cast(dstPtr) + dstDescPtr->offsetInBytes,
+ dstDescPtr,
+ anchorBoxInfoTensor,
+ static_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc,
+ roiType,
+ layoutParams,
+ rpp::deref(rppHandle));
+ }
+ else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16))
+ {
+ erase_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes),
+ srcDescPtr,
+ reinterpret_cast(static_cast(dstPtr) + srcDescPtr->offsetInBytes),
+ dstDescPtr,
+ anchorBoxInfoTensor,
+ static_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc,
+ roiType,
+ layoutParams,
+ rpp::deref(rppHandle));
+ }
+ else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
+ {
+ erase_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes),
+ srcDescPtr,
+ reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes),
+ dstDescPtr,
+ anchorBoxInfoTensor,
+ static_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc,
+ roiType,
+ layoutParams,
+ rpp::deref(rppHandle));
+ }
+ else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8))
+ {
+ erase_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes,
+ srcDescPtr,
+ static_cast(dstPtr) + dstDescPtr->offsetInBytes,
+ dstDescPtr,
+ anchorBoxInfoTensor,
+ static_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc,
+ roiType,
+ layoutParams,
+ rpp::deref(rppHandle));
+ }
+
+ return RPP_SUCCESS;
+}
+
/******************** ricap ********************/
RppStatus rppt_ricap_host(RppPtr_t srcPtr,
@@ -1575,4 +1649,76 @@ RppStatus rppt_vignette_gpu(RppPtr_t srcPtr,
#endif // backend
}
+RppStatus rppt_erase_gpu(RppPtr_t srcPtr,
+ RpptDescPtr srcDescPtr,
+ RppPtr_t dstPtr,
+ RpptDescPtr dstDescPtr,
+ RpptRoiLtrb *anchorBoxInfoTensor,
+ RppPtr_t colorsTensor,
+ Rpp32u *numBoxesTensor,
+ RpptROIPtr roiTensorPtrSrc,
+ RpptRoiType roiType,
+ rppHandle_t rppHandle)
+{
+#ifdef HIP_COMPILE
+
+ if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
+ {
+ hip_exec_erase_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes,
+ srcDescPtr,
+ static_cast(dstPtr) + dstDescPtr->offsetInBytes,
+ dstDescPtr,
+ anchorBoxInfoTensor,
+ static_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc,
+ roiType,
+ rpp::deref(rppHandle));
+ }
+ else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16))
+ {
+ hip_exec_erase_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes),
+ srcDescPtr,
+ reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes),
+ dstDescPtr,
+ anchorBoxInfoTensor,
+ static_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc,
+ roiType,
+ rpp::deref(rppHandle));
+ }
+ else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
+ {
+ hip_exec_erase_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes),
+ srcDescPtr,
+ reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes),
+ dstDescPtr,
+ anchorBoxInfoTensor,
+ static_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc,
+ roiType,
+ rpp::deref(rppHandle));
+ }
+ else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8))
+ {
+ hip_exec_erase_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes,
+ srcDescPtr,
+ static_cast(dstPtr) + dstDescPtr->offsetInBytes,
+ dstDescPtr,
+ anchorBoxInfoTensor,
+ static_cast(colorsTensor),
+ numBoxesTensor,
+ roiTensorPtrSrc,
+ roiType,
+ rpp::deref(rppHandle));
+ }
+
+ return RPP_SUCCESS;
+#elif defined(OCL_COMPILE)
+ return RPP_ERROR_NOT_IMPLEMENTED;
+#endif // backend
+}
+
#endif // GPU_SUPPORT
diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp
index bb357c030..604231e3c 100644
--- a/utilities/test_suite/HIP/Tensor_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_hip.cpp
@@ -356,6 +356,18 @@ int main(int argc, char **argv)
if(testCase == 82)
CHECK_RETURN_STATUS(hipHostMalloc(&roiPtrInputCropRegion, 4 * sizeof(RpptROI)));
+ Rpp32u boxesInEachImage = 3;
+ Rpp32f *colorBuffer;
+ RpptRoiLtrb *anchorBoxInfoTensor;
+ Rpp32u *numOfBoxes;
+ if(testCase == 32)
+ {
+ CHECK_RETURN_STATUS(hipHostMalloc(&colorBuffer, batchSize * boxesInEachImage * sizeof(Rpp32f)));
+ CHECK_RETURN_STATUS(hipMemset(colorBuffer, 0, batchSize * boxesInEachImage * sizeof(Rpp32f)));
+ CHECK_RETURN_STATUS(hipHostMalloc(&anchorBoxInfoTensor, batchSize * boxesInEachImage * sizeof(RpptRoiLtrb)));
+ CHECK_RETURN_STATUS(hipHostMalloc(&numOfBoxes, batchSize * sizeof(Rpp32u)));
+ }
+
// create cropRoi and patchRoi in case of crop_and_patch
RpptROI *cropRoi, *patchRoi;
if(testCase == 33)
@@ -736,6 +748,19 @@ int main(int argc, char **argv)
break;
}
+ case 32:
+ {
+ testCaseName = "erase";
+
+ init_erase(batchSize, boxesInEachImage, numOfBoxes, anchorBoxInfoTensor, roiTensorPtrSrc, srcDescPtr->c, colorBuffer, inputBitDepth);
+ startWallTime = omp_get_wtime();
+ if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+ rppt_erase_gpu(d_input, srcDescPtr, d_output, dstDescPtr, anchorBoxInfoTensor, colorBuffer, numOfBoxes, roiTensorPtrSrc, roiTypeSrc, handle);
+ else
+ missingFuncFlag = 1;
+
+ break;
+ }
case 33:
{
testCaseName = "crop_and_patch";
@@ -1433,6 +1458,12 @@ int main(int argc, char **argv)
}
if (reductionTypeCase)
CHECK_RETURN_STATUS(hipHostFree(reductionFuncResultArr));
+ if(testCase == 32)
+ {
+ CHECK_RETURN_STATUS(hipHostFree(colorBuffer));
+ CHECK_RETURN_STATUS(hipHostFree(anchorBoxInfoTensor));
+ CHECK_RETURN_STATUS(hipHostFree(numOfBoxes));
+ }
if(anchorTensor != NULL)
CHECK_RETURN_STATUS(hipHostFree(anchorTensor));
if(shapeTensor != NULL)
diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index d32d20e2b..f62103091 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -296,7 +296,7 @@ def rpp_test_suite_parser_and_validator():
subprocess.run(["make", "-j16"], cwd=".") # nosec
# List of cases supported
-supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90']
+supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '32', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90']
# Create folders based on testType and profilingOption
if testType == 1 and profilingOption == "YES":
diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp
index 051d4045a..f85ea5704 100644
--- a/utilities/test_suite/HOST/Tensor_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_host.cpp
@@ -740,6 +740,47 @@ int main(int argc, char **argv)
break;
}
+ case 32:
+ {
+ testCaseName = "erase";
+ Rpp32u boxesInEachImage = 3;
+ Rpp32f colorBuffer[batchSize * boxesInEachImage];
+ RpptRoiLtrb anchorBoxInfoTensor[batchSize * boxesInEachImage];
+ Rpp32u numOfBoxes[batchSize];
+ int idx;
+
+ init_erase(batchSize, boxesInEachImage, numOfBoxes, anchorBoxInfoTensor, roiTensorPtrSrc, srcDescPtr->c, colorBuffer, inputBitDepth);
+
+ startWallTime = omp_get_wtime();
+ startCpuTime = clock();
+ if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+ rppt_erase_host(input, srcDescPtr, output, dstDescPtr, anchorBoxInfoTensor, colorBuffer, numOfBoxes, roiTensorPtrSrc, roiTypeSrc, handle);
+ else
+ missingFuncFlag = 1;
+
+ break;
+ }
+ case 33:
+ {
+ testCaseName = "crop_and_patch";
+
+ for (i = 0; i < batchSize; i++)
+ {
+ cropRoi[i].xywhROI.xy.x = patchRoi[i].xywhROI.xy.x = roiList[0];
+ cropRoi[i].xywhROI.xy.y = patchRoi[i].xywhROI.xy.y = roiList[1];
+ cropRoi[i].xywhROI.roiWidth = patchRoi[i].xywhROI.roiWidth = roiWidthList[i];
+ cropRoi[i].xywhROI.roiHeight = patchRoi[i].xywhROI.roiHeight = roiHeightList[i];
+ }
+
+ startWallTime = omp_get_wtime();
+ startCpuTime = clock();
+ if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+ rppt_crop_and_patch_host(input, input_second, srcDescPtr, output, dstDescPtr, roiTensorPtrDst, cropRoi, patchRoi, roiTypeSrc, handle);
+ else
+ missingFuncFlag = 1;
+
+ break;
+ }
case 34:
{
testCaseName = "lut";
@@ -803,27 +844,6 @@ int main(int argc, char **argv)
break;
}
- case 33:
- {
- testCaseName = "crop_and_patch";
-
- for (i = 0; i < batchSize; i++)
- {
- cropRoi[i].xywhROI.xy.x = patchRoi[i].xywhROI.xy.x = roiList[0];
- cropRoi[i].xywhROI.xy.y = patchRoi[i].xywhROI.xy.y = roiList[1];
- cropRoi[i].xywhROI.roiWidth = patchRoi[i].xywhROI.roiWidth = roiWidthList[i];
- cropRoi[i].xywhROI.roiHeight = patchRoi[i].xywhROI.roiHeight = roiHeightList[i];
- }
-
- startWallTime = omp_get_wtime();
- startCpuTime = clock();
- if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
- rppt_crop_and_patch_host(input, input_second, srcDescPtr, output, dstDescPtr, roiTensorPtrDst, cropRoi, patchRoi, roiTypeSrc, handle);
- else
- missingFuncFlag = 1;
-
- break;
- }
case 37:
{
testCaseName = "crop";
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 6f8eb7f70..f83f981fa 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -272,7 +272,7 @@ def rpp_test_suite_parser_and_validator():
subprocess.run(["make", "-j16"], cwd=".") # nosec
# List of cases supported
-supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90']
+supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '32', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90']
print("\n\n\n\n\n")
print("##########################################################################################")
diff --git a/utilities/test_suite/REFERENCE_OUTPUT/erase/erase_u8_Tensor.bin b/utilities/test_suite/REFERENCE_OUTPUT/erase/erase_u8_Tensor.bin
new file mode 100644
index 000000000..b3f0894ee
Binary files /dev/null and b/utilities/test_suite/REFERENCE_OUTPUT/erase/erase_u8_Tensor.bin differ
diff --git a/utilities/test_suite/rpp_test_suite_common.h b/utilities/test_suite/rpp_test_suite_common.h
index 5623c1b7b..5e1528c81 100644
--- a/utilities/test_suite/rpp_test_suite_common.h
+++ b/utilities/test_suite/rpp_test_suite_common.h
@@ -81,6 +81,7 @@ std::map augmentationMap =
{29, "water"},
{30, "non_linear_blend"},
{31, "color_cast"},
+ {32, "erase"},
{33, "crop_and_patch"},
{34, "lut"},
{36, "color_twist"},
@@ -1396,3 +1397,77 @@ void init_slice(RpptGenericDescPtr descriptorPtr3D, RpptROIPtr roiPtrSrc, Rpp32u
}
}
}
+
+// Erase Region initializer for unit and performance testing
+void inline init_erase(int batchSize, int boxesInEachImage, Rpp32u* numOfBoxes, RpptRoiLtrb* anchorBoxInfoTensor, RpptROIPtr roiTensorPtrSrc, int channels, Rpp32f *colorBuffer, int inputBitDepth)
+{
+ Rpp8u *colors8u = reinterpret_cast(colorBuffer);
+ Rpp16f *colors16f = reinterpret_cast(colorBuffer);
+ Rpp32f *colors32f = colorBuffer;
+ Rpp8s *colors8s = reinterpret_cast(colorBuffer);
+ for(int i = 0; i < batchSize; i++)
+ {
+ numOfBoxes[i] = boxesInEachImage;
+ int idx = boxesInEachImage * i;
+
+ anchorBoxInfoTensor[idx].lt.x = 0.125 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+ anchorBoxInfoTensor[idx].lt.y = 0.125 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+ anchorBoxInfoTensor[idx].rb.x = 0.375 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+ anchorBoxInfoTensor[idx].rb.y = 0.375 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+
+ idx++;
+ anchorBoxInfoTensor[idx].lt.x = 0.125 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+ anchorBoxInfoTensor[idx].lt.y = 0.625 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+ anchorBoxInfoTensor[idx].rb.x = 0.875 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+ anchorBoxInfoTensor[idx].rb.y = 0.875 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+
+ idx++;
+ anchorBoxInfoTensor[idx].lt.x = 0.75 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+ anchorBoxInfoTensor[idx].lt.y = 0.125 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+ anchorBoxInfoTensor[idx].rb.x = 0.875 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+ anchorBoxInfoTensor[idx].rb.y = 0.5 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+
+ if(channels == 3)
+ {
+ int idx = boxesInEachImage * 3 * i;
+ colorBuffer[idx] = 0;
+ colorBuffer[idx + 1] = 0;
+ colorBuffer[idx + 2] = 240;
+ colorBuffer[idx + 3] = 0;
+ colorBuffer[idx + 4] = 240;
+ colorBuffer[idx + 5] = 0;
+ colorBuffer[idx + 6] = 240;
+ colorBuffer[idx + 7] = 0;
+ colorBuffer[idx + 8] = 0;
+ for (int j = 0; j < 9; j++)
+ {
+ if (!inputBitDepth)
+ colors8u[idx + j] = (Rpp8u)(colorBuffer[idx + j]);
+ else if (inputBitDepth == 1)
+ colors16f[idx + j] = (Rpp16f)(colorBuffer[idx + j] * ONE_OVER_255);
+ else if (inputBitDepth == 2)
+ colors32f[idx + j] = (Rpp32f)(colorBuffer[idx + j] * ONE_OVER_255);
+ else if (inputBitDepth == 5)
+ colors8s[idx + j] = (Rpp8s)(colorBuffer[idx + j] - 128);
+ }
+ }
+ else
+ {
+ int idx = boxesInEachImage * i;
+ colorBuffer[idx] = 240;
+ colorBuffer[idx + 1] = 120;
+ colorBuffer[idx + 2] = 60;
+ for (int j = 0; j < 3; j++)
+ {
+ if (!inputBitDepth)
+ colors8u[idx + j] = (Rpp8u)(colorBuffer[idx + j]);
+ else if (inputBitDepth == 1)
+ colors16f[idx + j] = (Rpp16f)(colorBuffer[idx + j] * ONE_OVER_255);
+ else if (inputBitDepth == 2)
+ colors32f[idx + j] = (Rpp32f)(colorBuffer[idx + j] * ONE_OVER_255);
+ else if (inputBitDepth == 5)
+ colors8s[idx + j] = (Rpp8s)(colorBuffer[idx + j] - 128);
+ }
+ }
+ }
+}