diff --git a/docs/data/doxygenOutputs/effects_augmentations_erase_img150x150.png b/docs/data/doxygenOutputs/effects_augmentations_erase_img150x150.png
new file mode 100644
index 000000000..136eca9bb
Binary files /dev/null and b/docs/data/doxygenOutputs/effects_augmentations_erase_img150x150.png differ
diff --git a/include/rppt_tensor_effects_augmentations.h b/include/rppt_tensor_effects_augmentations.h
index 365c22545..708f318bf 100644
--- a/include/rppt_tensor_effects_augmentations.h
+++ b/include/rppt_tensor_effects_augmentations.h
@@ -509,6 +509,56 @@ RppStatus rppt_gaussian_noise_voxel_host(RppPtr_t srcPtr, RpptGenericDescPtr src
 RppStatus rppt_gaussian_noise_voxel_gpu(RppPtr_t srcPtr, RpptGenericDescPtr srcDescPtr, RppPtr_t dstPtr, RpptGenericDescPtr dstDescPtr, Rpp32f *meanTensor, Rpp32f *stdDevTensor, Rpp32u seed, RpptROI3DPtr roiGenericPtrSrc, RpptRoi3DType roiType, rppHandle_t rppHandle);
 #endif // GPU_SUPPORT
 
+/*! \brief Erase augmentation on HOST backend for a NCHW/NHWC layout tensor
+ * \details This function erases one or more user defined regions from an image, for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
+ *          srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
+ *          dstPtr depth ranges - Will be same depth as srcPtr.
+ * \image html img150x150.png Sample Input
+ * \image html effects_augmentations_erase_img150x150.png Sample Output
+ * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
+ * \param [out] dstPtr destination tensor in HOST memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
+ * \param [in] anchorBoxInfoTensor anchorBoxInfo values of type RpptRoiLtrb for each erase-region inside each image in the batch. Restrictions -
+            - 0 <= anchorBoxInfo[i] < respective image width/height
+            - Erase-region anchor boxes on each image given by the user must not overlap
+ * \param [in] colorsTensor RGB values to use for each erase-region inside each image in the batch. (colors[i] will have range equivalent of srcPtr)
+ * \param [in] numBoxesTensor number of erase-regions per image, for each image in the batch. (numBoxesTensor[n] >= 0)
+ * \param [in] roiTensorSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
+ * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
+ * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_erase_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptRoiLtrb *anchorBoxInfoTensor, RppPtr_t colorsTensor, Rpp32u *numBoxesTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
+
+#ifdef GPU_SUPPORT
+/*! \brief Erase augmentation on HIP backend for a NCHW/NHWC layout tensor
+ * \details This function erases one or more user defined regions from an image, for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
+ *          srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
+ *          dstPtr depth ranges - Will be same depth as srcPtr.
+ * \image html img150x150.jpg Sample Input
+ * \image html effects_augmentations_erase_img150x150.jpg Sample Output
+ * \param [in] srcPtr source tensor in HIP memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
+ * \param [out] dstPtr destination tensor in HIP memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
+ * \param [in] anchorBoxInfoTensor anchorBoxInfo values of type RpptRoiLtrb for each erase-region inside each image in the batch. Restrictions -
+            - 0 <= anchorBoxInfo[i] < respective image width/height
+            - Erase-region anchor boxes on each image given by the user must not overlap
+ * \param [in] colorsTensor RGB values to use for each erase-region inside each image in the batch. (colors[i] will have range equivalent of srcPtr)
+ * \param [in] numBoxesTensor number of erase-regions per image, for each image in the batch. (numBoxesTensor[n] >= 0)
+ * \param [in] roiTensorSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
+ * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
+ * \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_erase_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, RpptRoiLtrb *anchorBoxInfoTensor, RppPtr_t colorsTensor, Rpp32u *numBoxesTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
+#endif // GPU_SUPPORT
+
 /*! @}
  */
 
diff --git a/src/include/hip/rpp_hip_common.hpp b/src/include/hip/rpp_hip_common.hpp
index 169ba8bd5..a5cd5972c 100644
--- a/src/include/hip/rpp_hip_common.hpp
+++ b/src/include/hip/rpp_hip_common.hpp
@@ -82,6 +82,7 @@ typedef union { uchar uc1[8];   uchar4 uc4[2];
 typedef union { uchar uc1[24];  uchar4 uc4[6];  uchar3 uc3[8];    d_uchar8 uc8[3];              }   d_uchar24;
 
 // schar
+typedef struct { schar sc1[3];                                                                  }   d_schar3_s;
 typedef struct { schar sc1[8];                                                                  }   d_schar8_s;
 typedef struct { d_schar8_s sc8[3];                                                             }   d_schar24_s;
 
diff --git a/src/modules/cpu/host_tensor_effects_augmentations.hpp b/src/modules/cpu/host_tensor_effects_augmentations.hpp
index 8d50fa213..9388ed6bd 100644
--- a/src/modules/cpu/host_tensor_effects_augmentations.hpp
+++ b/src/modules/cpu/host_tensor_effects_augmentations.hpp
@@ -34,5 +34,6 @@ SOFTWARE.
 #include "kernel/water.hpp"
 #include "kernel/ricap.hpp"
 #include "kernel/vignette.hpp"
+#include "kernel/erase.hpp"
 
 #endif // HOST_TENSOR_EFFECTS_AUGMENTATIONS_HPP
diff --git a/src/modules/cpu/kernel/erase.hpp b/src/modules/cpu/kernel/erase.hpp
new file mode 100644
index 000000000..487b0b56d
--- /dev/null
+++ b/src/modules/cpu/kernel/erase.hpp
@@ -0,0 +1,332 @@
+/*
+MIT License
+
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include "rppdefs.h"
+#include "rpp_cpu_simd.hpp"
+#include "rpp_cpu_common.hpp"
+
+template <typename T>
+RppStatus erase_host_tensor(T *srcPtr,
+                            RpptDescPtr srcDescPtr,
+                            T *dstPtr,
+                            RpptDescPtr dstDescPtr,
+                            RpptRoiLtrb *anchorBoxInfoTensor,
+                            T *colorsTensor,
+                            Rpp32u *numBoxesTensor,
+                            RpptROIPtr roiTensorPtrSrc,
+                            RpptRoiType roiType,
+                            RppLayoutParams layoutParams,
+                            rpp::Handle& handle)
+{
+    RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h};
+    Rpp32u numThreads = handle.GetNumThreads();
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++)
+    {
+        RpptROI roi;
+        RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount];
+        compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType);
+
+        Rpp32u numBoxes = numBoxesTensor[batchCount];
+        RpptRoiLtrb *anchorBoxInfo = anchorBoxInfoTensor + batchCount * numBoxes;
+        T *colors = colorsTensor + batchCount * numBoxes * srcDescPtr->c;
+
+        T *srcPtrImage, *dstPtrImage;
+        srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride;
+
+        T *srcPtrChannel, *dstPtrChannel;
+        srcPtrChannel = srcPtrImage + (roi.xywhROI.xy.y * srcDescPtr->strides.hStride) + (roi.xywhROI.xy.x * layoutParams.bufferMultiplier);
+        dstPtrChannel = dstPtrImage;
+        T userPixel3[3];
+        Rpp32u bufferLength = roi.xywhROI.roiWidth * layoutParams.bufferMultiplier * sizeof(T);
+
+        // Erase with fused output-layout toggle (NHWC -> NCHW)
+        if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            T *srcPtrRow, *dstPtrRowR, *dstPtrRowG, *dstPtrRowB;
+            srcPtrRow = srcPtrChannel;
+            dstPtrRowR = dstPtrChannel;
+            dstPtrRowG = dstPtrRowR + dstDescPtr->strides.cStride;
+            dstPtrRowB = dstPtrRowG + dstDescPtr->strides.cStride;
+
+            for(int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                T *srcPtrTemp, *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                srcPtrTemp = srcPtrRow;
+                dstPtrTempR = dstPtrRowR;
+                dstPtrTempG = dstPtrRowG;
+                dstPtrTempB = dstPtrRowB;
+                bool isErase = false;
+                Rpp32u bufferLength = 0;
+                T userPixelR, userPixelG, userPixelB;
+
+                for (int j = 0; j < roi.xywhROI.roiWidth;)
+                {
+                    for(int count = 0; count < numBoxes; count++)
+                    {
+                        Rpp32u x1 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth));
+                        Rpp32u y1 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight));
+                        Rpp32u x2 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth));
+                        Rpp32u y2 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight));
+                        Rpp32u countMul3 = count * 3;
+                        userPixelR = colors[countMul3];
+                        userPixelG = colors[countMul3 + 1];
+                        userPixelB = colors[countMul3 + 2];
+                        if(i >= y1 && i <= y2 && j >= x1 && j <= x2)
+                        {
+                            isErase = true;
+                            bufferLength = x2 - x1 + 1;
+                            break;
+                        }
+                    }
+                    if(isErase && bufferLength)
+                    {
+                        std::fill_n(dstPtrTempR, bufferLength, userPixelR);
+                        std::fill_n(dstPtrTempG, bufferLength, userPixelG);
+                        std::fill_n(dstPtrTempB, bufferLength, userPixelB);
+                        srcPtrTemp += 3 * bufferLength;
+                        j += bufferLength;
+                        dstPtrTempR += bufferLength;
+                        dstPtrTempG += bufferLength;
+                        dstPtrTempB += bufferLength;
+                        isErase = false;
+                    }
+                    else
+                    {
+                        *dstPtrTempR++ = srcPtrTemp[0];
+                        *dstPtrTempG++ = srcPtrTemp[1];
+                        *dstPtrTempB++ = srcPtrTemp[2];
+                        srcPtrTemp += 3;
+                        j++;
+                    }
+                }
+
+                srcPtrRow += srcDescPtr->strides.hStride;
+                dstPtrRowR += dstDescPtr->strides.hStride;
+                dstPtrRowG += dstDescPtr->strides.hStride;
+                dstPtrRowB += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Erase with fused output-layout toggle (NCHW -> NHWC)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            T *srcPtrRowR, *srcPtrRowG, *srcPtrRowB, *dstPtrRow;
+            srcPtrRowR = srcPtrChannel;
+            srcPtrRowG = srcPtrRowR + srcDescPtr->strides.cStride;
+            srcPtrRowB = srcPtrRowG + srcDescPtr->strides.cStride;
+            dstPtrRow = dstPtrChannel;
+
+            for(int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                T *srcPtrTempR, *srcPtrTempG, *srcPtrTempB, *dstPtrTemp;
+                srcPtrTempR = srcPtrRowR;
+                srcPtrTempG = srcPtrRowG;
+                srcPtrTempB = srcPtrRowB;
+                dstPtrTemp = dstPtrRow;
+                bool isErase = false;
+                Rpp32u bufferLengthPerChannel = 0;
+
+                for (int j = 0; j < roi.xywhROI.roiWidth;)
+                {
+                    for(int count = 0; count < numBoxes; count++)
+                    {
+                        Rpp32u x1 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth));
+                        Rpp32u y1 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight));
+                        Rpp32u x2 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth));
+                        Rpp32u y2 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight));
+                        Rpp32u countMul3 = count * 3;
+                        userPixel3[0] = colors[countMul3];
+                        userPixel3[1] = colors[countMul3 + 1];
+                        userPixel3[2] = colors[countMul3 + 2];
+
+                        if(i >= y1 && i <= y2 && j >= x1 && j <= x2)
+                        {
+                            isErase = true;
+                            bufferLengthPerChannel = x2 - x1 + 1;
+                            break;
+                        }
+                    }
+                    if(isErase && bufferLengthPerChannel)
+                    {
+                        for (int k = 0; k < bufferLengthPerChannel; k++)
+                        {
+                            memcpy(dstPtrTemp, userPixel3, sizeof(T) * 3);
+                            dstPtrTemp += 3;
+                        }
+                        j += bufferLengthPerChannel;
+                        srcPtrTempR += bufferLengthPerChannel;
+                        srcPtrTempG += bufferLengthPerChannel;
+                        srcPtrTempB += bufferLengthPerChannel;
+                        isErase = false;
+                    }
+                    else
+                    {
+                        dstPtrTemp[0] = *srcPtrTempR++;
+                        dstPtrTemp[1] = *srcPtrTempG++;
+                        dstPtrTemp[2] = *srcPtrTempB++;
+                        dstPtrTemp += 3;
+                        j++;
+                    }
+                }
+
+                srcPtrRowR += srcDescPtr->strides.hStride;
+                srcPtrRowG += srcDescPtr->strides.hStride;
+                srcPtrRowB += srcDescPtr->strides.hStride;
+                dstPtrRow += dstDescPtr->strides.hStride;
+            }
+        }
+
+        // Erase without fused output-layout toggle 3 channel(NCHW -> NCHW)
+        else if ((srcDescPtr->c == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            // To copy ROI region in Image
+            for(int c = 0; c < layoutParams.channelParam; c++)
+            {
+                T *srcPtrRow, *dstPtrRow;
+                srcPtrRow = srcPtrChannel;
+                dstPtrRow = dstPtrChannel;
+
+                for(int i = 0; i < roi.xywhROI.roiHeight; i++)
+                {
+                    memcpy(dstPtrRow, srcPtrRow, bufferLength);
+                    srcPtrRow += srcDescPtr->strides.hStride;
+                    dstPtrRow += dstDescPtr->strides.hStride;
+                }
+
+                srcPtrChannel += srcDescPtr->strides.cStride;
+                dstPtrChannel += dstDescPtr->strides.cStride;
+            }
+
+            for(int count = 0; count < numBoxes; count++)
+            {
+                Rpp32u x1 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth));
+                Rpp32u y1 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight));
+                Rpp32u x2 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth));
+                Rpp32u y2 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight));
+
+                Rpp32u pixelLocation = (y1 * srcDescPtr->strides.hStride) + (x1 * srcDescPtr->strides.wStride);
+                Rpp32u boxHeight = y2 - y1 + 1;
+                Rpp32u boxWidth = x2 - x1 + 1;
+
+                T *dstPtrTempR, *dstPtrTempG, *dstPtrTempB;
+                dstPtrTempR = dstPtrImage + pixelLocation;
+                dstPtrTempG = dstPtrTempR + dstDescPtr->strides.cStride;
+                dstPtrTempB = dstPtrTempG + dstDescPtr->strides.cStride;
+                Rpp32u countMul3 = count * 3;
+                T userPixelR = colors[countMul3];
+                T userPixelG = colors[countMul3 + 1];
+                T userPixelB = colors[countMul3 + 2];
+                for (int i = 0; i < boxHeight; i++)
+                {
+                    std::fill_n(dstPtrTempR, boxWidth, userPixelR);
+                    std::fill_n(dstPtrTempG, boxWidth, userPixelG);
+                    std::fill_n(dstPtrTempB, boxWidth, userPixelB);
+                    dstPtrTempR += dstDescPtr->strides.hStride;
+                    dstPtrTempG += dstDescPtr->strides.hStride;
+                    dstPtrTempB += dstDescPtr->strides.hStride;
+                }
+            }
+        }
+        // Erase without fused output-layout toggle 1 channel(NCHW -> NCHW)
+        else if ((srcDescPtr->c == 1) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            // To copy ROI region in Image
+            for(int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                memcpy(dstPtrChannel, srcPtrChannel, bufferLength);
+                srcPtrChannel += srcDescPtr->strides.hStride;
+                dstPtrChannel += dstDescPtr->strides.hStride;
+            }
+
+            for (int count = 0; count < numBoxes; count++)
+            {
+                Rpp32u x1 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth);
+                Rpp32u y1 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight);
+                Rpp32u x2 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth);
+                Rpp32u y2 = (Rpp32u)RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight);
+
+                Rpp32u pixelLocation = (y1 * srcDescPtr->strides.hStride) + (x1 * srcDescPtr->strides.wStride);
+                Rpp32u boxHeight = y2 - y1 + 1;
+                Rpp32u boxWidth = x2 - x1 + 1;
+
+                T *dstPtrTemp;
+                dstPtrTemp = dstPtrImage + pixelLocation;
+
+                T userPixel = colors[count];
+                for (int i = 0; i < boxHeight; i++)
+                {
+                    std::fill_n(dstPtrTemp, boxWidth, userPixel);
+                    dstPtrTemp += dstDescPtr->strides.hStride;
+                }
+            }
+        }
+
+        // Erase without fused output-layout toggle 3 channel(NHWC -> NHWC)
+        else
+        {
+            // To copy ROI region in Image
+            for(int i = 0; i < roi.xywhROI.roiHeight; i++)
+            {
+                memcpy(dstPtrChannel, srcPtrChannel, bufferLength);
+                srcPtrChannel += srcDescPtr->strides.hStride;
+                dstPtrChannel += dstDescPtr->strides.hStride;
+            }
+
+            for(int count = 0; count < numBoxes; count++)
+            {
+                Rpp32u x1 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].lt.x, roi.xywhROI.xy.x, roi.xywhROI.roiWidth));
+                Rpp32u y1 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].lt.y, roi.xywhROI.xy.y, roi.xywhROI.roiHeight));
+                Rpp32u x2 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].rb.x, x1, roi.xywhROI.roiWidth));
+                Rpp32u y2 = static_cast<Rpp32u>(RPPPRANGECHECK(anchorBoxInfo[count].rb.y, y1, roi.xywhROI.roiHeight));
+                Rpp32u countMul3 = count * 3;
+                userPixel3[0] = colors[countMul3];
+                userPixel3[1] = colors[countMul3 + 1];
+                userPixel3[2] = colors[countMul3 + 2];
+
+                Rpp32u pixelLocation = (y1 * srcDescPtr->strides.hStride) + (x1 * srcDescPtr->strides.wStride);
+                Rpp32u boxHeight = y2 - y1 + 1;
+                Rpp32u boxWidth = x2 - x1 + 1;
+                T *dstPtrTemp;
+                dstPtrTemp = dstPtrImage + pixelLocation;
+
+                for (int i = 0; i < boxHeight; i++)
+                {
+                    T *dstPtrRow = dstPtrTemp;
+                    for (int j = 0; j < boxWidth; j++)
+                    {
+                        memcpy(dstPtrRow, userPixel3, sizeof(T) * 3);
+                        dstPtrRow += 3;
+                    }
+                    dstPtrTemp += dstDescPtr->strides.hStride;
+                }
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
diff --git a/src/modules/hip/hip_tensor_effects_augmentations.hpp b/src/modules/hip/hip_tensor_effects_augmentations.hpp
index 25cd9a863..abdfd30ab 100644
--- a/src/modules/hip/hip_tensor_effects_augmentations.hpp
+++ b/src/modules/hip/hip_tensor_effects_augmentations.hpp
@@ -34,5 +34,6 @@ SOFTWARE.
 #include "kernel/water.hpp"
 #include "kernel/ricap.hpp"
 #include "kernel/vignette.hpp"
+#include "kernel/erase.hpp"
 
 #endif // HIP_TENSOR_EFFECTS_AUGMENTATIONS_HPP
diff --git a/src/modules/hip/kernel/erase.hpp b/src/modules/hip/kernel/erase.hpp
new file mode 100644
index 000000000..2591b53f0
--- /dev/null
+++ b/src/modules/hip/kernel/erase.hpp
@@ -0,0 +1,278 @@
+#include <hip/hip_runtime.h>
+#include "rpp_hip_common.hpp"
+
+// -------------------- Set 0 - Erase main kernels --------------------
+template <typename T, typename U>
+__global__ void erase_pkd_hip_tensor(T *dstPtr,
+                                     uint2 dstStridesNH,
+                                     RpptRoiLtrb *anchorBoxInfoTensor,
+                                     U *colorsTensor,
+                                     Rpp32u *numBoxesTensor,
+                                     RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+        return;
+
+    Rpp32u numBoxes = numBoxesTensor[id_z];
+    uint dstIdx = (id_z * dstStridesNH.x) + (id_y * dstStridesNH.y) + id_x * 3;
+
+    // check if the co-ordinates is within any user defined box
+    for (int i = 0; i < numBoxes; i++)
+    {
+        int temp = (id_z * numBoxes) + i;
+        if (id_x >= anchorBoxInfoTensor[temp].lt.x && id_x <= anchorBoxInfoTensor[temp].rb.x && id_y >= anchorBoxInfoTensor[temp].lt.y && id_y <= anchorBoxInfoTensor[temp].rb.y)
+        {
+            *reinterpret_cast<U *>(dstPtr + dstIdx) = static_cast<U>(colorsTensor[temp]);
+            break;
+        }
+    }
+}
+
+template <typename T>
+__global__ void erase_pln_hip_tensor(T *dstPtr,
+                                     uint3 dstStridesNCH,
+                                     RpptRoiLtrb *anchorBoxInfoTensor,
+                                     T *colorsTensor,
+                                     Rpp32u *numBoxesTensor,
+                                     RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+        return;
+
+    Rpp32u numBoxes = numBoxesTensor[id_z];
+    uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x;
+
+    // check if the co-ordinates is within any user defined box
+    for (int i = 0; i < numBoxes; i++)
+    {
+        int temp = (id_z * numBoxes) + i;
+        if (id_x >= anchorBoxInfoTensor[temp].lt.x && id_x <= anchorBoxInfoTensor[temp].rb.x && id_y >= anchorBoxInfoTensor[temp].lt.y && id_y <= anchorBoxInfoTensor[temp].rb.y)
+        {
+            *static_cast<T *>((dstPtr + dstIdx)) = colorsTensor[temp];
+            break;
+        }
+    }
+}
+
+template <typename T>
+__global__ void erase_pln3_hip_tensor(T *dstPtr,
+                                      uint3 dstStridesNCH,
+                                      RpptRoiLtrb *anchorBoxInfoTensor,
+                                      T *colorsTensor,
+                                      Rpp32u *numBoxesTensor,
+                                      RpptROIPtr roiTensorPtrSrc)
+{
+    int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+    int id_y = hipBlockIdx_y * hipBlockDim_y + hipThreadIdx_y;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+
+    if ((id_y >= roiTensorPtrSrc[id_z].xywhROI.roiHeight) || (id_x >= roiTensorPtrSrc[id_z].xywhROI.roiWidth))
+        return;
+
+    Rpp32u numBoxes = numBoxesTensor[id_z];
+    uint dstIdx = (id_z * dstStridesNCH.x) + (id_y * dstStridesNCH.z) + id_x;
+
+    // check if the co-ordinates is within any user defined box
+    for (int i = 0; i < numBoxes; i++)
+    {
+        int temp = (id_z * numBoxes) + i;
+        if (id_x >= anchorBoxInfoTensor[temp].lt.x && id_x <= anchorBoxInfoTensor[temp].rb.x && id_y >= anchorBoxInfoTensor[temp].lt.y && id_y <= anchorBoxInfoTensor[temp].rb.y)
+        {
+            temp *= 3;
+            *static_cast<T *>(dstPtr + dstIdx) = colorsTensor[temp];
+            dstIdx += dstStridesNCH.y;
+            *static_cast<T *>(dstPtr + dstIdx) = colorsTensor[temp + 1];
+            dstIdx += dstStridesNCH.y;
+            *static_cast<T *>(dstPtr + dstIdx) = colorsTensor[temp + 2];
+            break;
+        }
+    }
+}
+
+// -------------------- Set 1 - Kernel Executors --------------------
+template <typename T, typename U>
+RppStatus hip_exec_erase_tensor(T *srcPtr,
+                                RpptDescPtr srcDescPtr,
+                                T *dstPtr,
+                                RpptDescPtr dstDescPtr,
+                                RpptRoiLtrb *anchorBoxInfoTensor,
+                                U *colorsTensor,
+                                Rpp32u *numBoxesTensor,
+                                RpptROIPtr roiTensorPtrSrc,
+                                RpptRoiType roiType,
+                                rpp::Handle& handle)
+{
+    if (roiType == RpptRoiType::LTRB)
+        hip_exec_roi_converison_ltrb_to_xywh(roiTensorPtrSrc, handle);
+
+    int globalThreads_x = dstDescPtr->w;
+    int globalThreads_y = dstDescPtr->h;
+    int globalThreads_z = handle.GetBatchSize();
+
+    if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NHWC))
+    {
+        if (srcDescPtr->dataType == RpptDataType::U8)
+        {
+            hipMemcpyAsync(dstPtr, srcPtr, static_cast<size_t>(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp8u)), hipMemcpyDeviceToDevice, handle.GetStream());
+            hipStreamSynchronize(handle.GetStream());
+            hipLaunchKernelGGL(erase_pkd_hip_tensor,
+                               dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               dstPtr,
+                               make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+                               anchorBoxInfoTensor,
+                               reinterpret_cast<uchar3*>(colorsTensor),
+                               numBoxesTensor,
+                               roiTensorPtrSrc);
+        }
+        else if (srcDescPtr->dataType == RpptDataType::F16)
+        {
+            hipMemcpyAsync(dstPtr, srcPtr, static_cast<size_t>(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp16f)), hipMemcpyDeviceToDevice, handle.GetStream());
+            hipStreamSynchronize(handle.GetStream());
+            hipLaunchKernelGGL(erase_pkd_hip_tensor,
+                               dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               dstPtr,
+                               make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+                               anchorBoxInfoTensor,
+                               reinterpret_cast<d_half3_s*>(colorsTensor),
+                               numBoxesTensor,
+                               roiTensorPtrSrc);
+        }
+        else if (srcDescPtr->dataType == RpptDataType::F32)
+        {
+            hipMemcpyAsync(dstPtr, srcPtr, static_cast<size_t>(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp32f)), hipMemcpyDeviceToDevice, handle.GetStream());
+            hipStreamSynchronize(handle.GetStream());
+            hipLaunchKernelGGL(erase_pkd_hip_tensor,
+                               dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               dstPtr,
+                               make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+                               anchorBoxInfoTensor,
+                               reinterpret_cast<float3*>(colorsTensor),
+                               numBoxesTensor,
+                               roiTensorPtrSrc);
+        }
+        else if (srcDescPtr->dataType == RpptDataType::I8)
+        {
+            hipMemcpyAsync(dstPtr, srcPtr, static_cast<size_t>(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(Rpp8s)), hipMemcpyDeviceToDevice, handle.GetStream());
+            hipStreamSynchronize(handle.GetStream());
+            hipLaunchKernelGGL(erase_pkd_hip_tensor,
+                               dim3(ceil((float)globalThreads_x / LOCAL_THREADS_X), ceil((float)globalThreads_y / LOCAL_THREADS_Y), ceil((float)globalThreads_z / LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               dstPtr,
+                               make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+                               anchorBoxInfoTensor,
+                               reinterpret_cast<d_schar3_s*>(colorsTensor),
+                               numBoxesTensor,
+                               roiTensorPtrSrc);
+        }
+    }
+    else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && dstDescPtr->c == 1)
+    {
+        hipMemcpyAsync(dstPtr, srcPtr, static_cast<size_t>(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(T)), hipMemcpyDeviceToDevice, handle.GetStream());
+        hipStreamSynchronize(handle.GetStream());
+        hipLaunchKernelGGL(erase_pln_hip_tensor,
+                           dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                           dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                           0,
+                           handle.GetStream(),
+                           dstPtr,
+                           make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+                           anchorBoxInfoTensor,
+                           colorsTensor,
+                           numBoxesTensor,
+                           roiTensorPtrSrc);
+    }
+    else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && dstDescPtr->c == 3)
+    {
+        hipMemcpyAsync(dstPtr, srcPtr, static_cast<size_t>(srcDescPtr->n * srcDescPtr->strides.nStride * sizeof(T)), hipMemcpyDeviceToDevice, handle.GetStream());
+        hipStreamSynchronize(handle.GetStream());
+        hipLaunchKernelGGL(erase_pln3_hip_tensor,
+                           dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                           dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                           0,
+                           handle.GetStream(),
+                           dstPtr,
+                           make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+                           anchorBoxInfoTensor,
+                           colorsTensor,
+                           numBoxesTensor,
+                           roiTensorPtrSrc);
+    }
+    else if ((srcDescPtr->c == 3) && (dstDescPtr->c == 3))
+    {
+        if ((srcDescPtr->layout == RpptLayout::NHWC) && (dstDescPtr->layout == RpptLayout::NCHW))
+        {
+            globalThreads_x = (dstDescPtr->w + 7) >> 3;
+            hipLaunchKernelGGL(convert_pkd3_pln3_hip_tensor,
+                               dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               srcPtr,
+                               make_uint2(srcDescPtr->strides.nStride, srcDescPtr->strides.hStride),
+                               dstPtr,
+                               make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+                               roiTensorPtrSrc);
+            hipStreamSynchronize(handle.GetStream());
+            globalThreads_x = dstDescPtr->w;
+            hipLaunchKernelGGL(erase_pln3_hip_tensor,
+                               dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               dstPtr,
+                               make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride),
+                               anchorBoxInfoTensor,
+                               colorsTensor,
+                               numBoxesTensor,
+                               roiTensorPtrSrc);
+        }
+        else if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NHWC))
+        {
+            globalThreads_x = (dstDescPtr->w + 7) >> 3;
+            hipLaunchKernelGGL(convert_pln3_pkd3_hip_tensor,
+                               dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               srcPtr,
+                               make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride),
+                               dstPtr,
+                               make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+                               roiTensorPtrSrc);
+            hipStreamSynchronize(handle.GetStream());
+            globalThreads_x = dstDescPtr->w;
+            hipLaunchKernelGGL(erase_pkd_hip_tensor,
+                               dim3(ceil((float)globalThreads_x/LOCAL_THREADS_X), ceil((float)globalThreads_y/LOCAL_THREADS_Y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)),
+                               dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z),
+                               0,
+                               handle.GetStream(),
+                               dstPtr,
+                               make_uint2(dstDescPtr->strides.nStride, dstDescPtr->strides.hStride),
+                               anchorBoxInfoTensor,
+                               colorsTensor,
+                               numBoxesTensor,
+                               roiTensorPtrSrc);
+        }
+    }
+
+    return RPP_SUCCESS;
+}
diff --git a/src/modules/rppt_tensor_effects_augmentations.cpp b/src/modules/rppt_tensor_effects_augmentations.cpp
index 24cc8e032..8829a4ee0 100644
--- a/src/modules/rppt_tensor_effects_augmentations.cpp
+++ b/src/modules/rppt_tensor_effects_augmentations.cpp
@@ -721,6 +721,80 @@ RppStatus rppt_vignette_host(RppPtr_t srcPtr,
 
 }
 
+/******************** erase ********************/
+
+RppStatus rppt_erase_host(RppPtr_t srcPtr,
+                          RpptDescPtr srcDescPtr,
+                          RppPtr_t dstPtr,
+                          RpptDescPtr dstDescPtr,
+                          RpptRoiLtrb *anchorBoxInfoTensor,
+                          RppPtr_t colorsTensor,
+                          Rpp32u *numBoxesTensor,
+                          RpptROIPtr roiTensorPtrSrc,
+                          RpptRoiType roiType,
+                          rppHandle_t rppHandle)
+{
+    RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c);
+    if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
+    {
+        erase_host_tensor(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes,
+                          srcDescPtr,
+                          static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes,
+                          dstDescPtr,
+                          anchorBoxInfoTensor,
+                          static_cast<Rpp8u*>(colorsTensor),
+                          numBoxesTensor,
+                          roiTensorPtrSrc,
+                          roiType,
+                          layoutParams,
+                          rpp::deref(rppHandle));
+    }
+    else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16))
+    {
+        erase_host_tensor(reinterpret_cast<Rpp16f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
+                          srcDescPtr,
+                          reinterpret_cast<Rpp16f*>(static_cast<Rpp8u*>(dstPtr) + srcDescPtr->offsetInBytes),
+                          dstDescPtr,
+                          anchorBoxInfoTensor,
+                          static_cast<Rpp16f*>(colorsTensor),
+                          numBoxesTensor,
+                          roiTensorPtrSrc,
+                          roiType,
+                          layoutParams,
+                          rpp::deref(rppHandle));
+    }
+    else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
+    {
+        erase_host_tensor(reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
+                          srcDescPtr,
+                          reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
+                          dstDescPtr,
+                          anchorBoxInfoTensor,
+                          static_cast<Rpp32f*>(colorsTensor),
+                          numBoxesTensor,
+                          roiTensorPtrSrc,
+                          roiType,
+                          layoutParams,
+                          rpp::deref(rppHandle));
+    }
+    else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8))
+    {
+        erase_host_tensor(static_cast<Rpp8s*>(srcPtr) + srcDescPtr->offsetInBytes,
+                          srcDescPtr,
+                          static_cast<Rpp8s*>(dstPtr) + dstDescPtr->offsetInBytes,
+                          dstDescPtr,
+                          anchorBoxInfoTensor,
+                          static_cast<Rpp8s*>(colorsTensor),
+                          numBoxesTensor,
+                          roiTensorPtrSrc,
+                          roiType,
+                          layoutParams,
+                          rpp::deref(rppHandle));
+    }
+
+    return RPP_SUCCESS;
+}
+
 /******************** ricap ********************/
 
 RppStatus rppt_ricap_host(RppPtr_t srcPtr,
@@ -1575,4 +1649,76 @@ RppStatus rppt_vignette_gpu(RppPtr_t srcPtr,
 #endif // backend
 }
 
+RppStatus rppt_erase_gpu(RppPtr_t srcPtr,
+                         RpptDescPtr srcDescPtr,
+                         RppPtr_t dstPtr,
+                         RpptDescPtr dstDescPtr,
+                         RpptRoiLtrb *anchorBoxInfoTensor,
+                         RppPtr_t colorsTensor,
+                         Rpp32u *numBoxesTensor,
+                         RpptROIPtr roiTensorPtrSrc,
+                         RpptRoiType roiType,
+                         rppHandle_t rppHandle)
+{
+#ifdef HIP_COMPILE
+
+    if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
+    {
+        hip_exec_erase_tensor(static_cast<Rpp8u *>(srcPtr) + srcDescPtr->offsetInBytes,
+                              srcDescPtr,
+                              static_cast<Rpp8u *>(dstPtr) + dstDescPtr->offsetInBytes,
+                              dstDescPtr,
+                              anchorBoxInfoTensor,
+                              static_cast<Rpp8u *>(colorsTensor),
+                              numBoxesTensor,
+                              roiTensorPtrSrc,
+                              roiType,
+                              rpp::deref(rppHandle));
+    }
+    else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16))
+    {
+        hip_exec_erase_tensor(reinterpret_cast<half*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
+                              srcDescPtr,
+                              reinterpret_cast<half*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
+                              dstDescPtr,
+                              anchorBoxInfoTensor,
+                              static_cast<half*>(colorsTensor),
+                              numBoxesTensor,
+                              roiTensorPtrSrc,
+                              roiType,
+                              rpp::deref(rppHandle));
+    }
+    else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
+    {
+        hip_exec_erase_tensor(reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
+                              srcDescPtr,
+                              reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
+                              dstDescPtr,
+                              anchorBoxInfoTensor,
+                              static_cast<Rpp32f*>(colorsTensor),
+                              numBoxesTensor,
+                              roiTensorPtrSrc,
+                              roiType,
+                              rpp::deref(rppHandle));
+    }
+    else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8))
+    {
+        hip_exec_erase_tensor(static_cast<Rpp8s*>(srcPtr) + srcDescPtr->offsetInBytes,
+                              srcDescPtr,
+                              static_cast<Rpp8s*>(dstPtr) + dstDescPtr->offsetInBytes,
+                              dstDescPtr,
+                              anchorBoxInfoTensor,
+                              static_cast<Rpp8s*>(colorsTensor),
+                              numBoxesTensor,
+                              roiTensorPtrSrc,
+                              roiType,
+                              rpp::deref(rppHandle));
+    }
+
+    return RPP_SUCCESS;
+#elif defined(OCL_COMPILE)
+    return RPP_ERROR_NOT_IMPLEMENTED;
+#endif // backend
+}
+
 #endif // GPU_SUPPORT
diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp
index bb357c030..604231e3c 100644
--- a/utilities/test_suite/HIP/Tensor_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_hip.cpp
@@ -356,6 +356,18 @@ int main(int argc, char **argv)
     if(testCase == 82)
         CHECK_RETURN_STATUS(hipHostMalloc(&roiPtrInputCropRegion, 4 * sizeof(RpptROI)));
 
+    Rpp32u boxesInEachImage = 3;
+    Rpp32f *colorBuffer;
+    RpptRoiLtrb *anchorBoxInfoTensor;
+    Rpp32u *numOfBoxes;
+    if(testCase == 32)
+    {
+        CHECK_RETURN_STATUS(hipHostMalloc(&colorBuffer, batchSize * boxesInEachImage * sizeof(Rpp32f)));
+        CHECK_RETURN_STATUS(hipMemset(colorBuffer, 0, batchSize * boxesInEachImage * sizeof(Rpp32f)));
+        CHECK_RETURN_STATUS(hipHostMalloc(&anchorBoxInfoTensor, batchSize * boxesInEachImage * sizeof(RpptRoiLtrb)));
+        CHECK_RETURN_STATUS(hipHostMalloc(&numOfBoxes, batchSize * sizeof(Rpp32u)));
+    }
+
     // create cropRoi and patchRoi in case of crop_and_patch
     RpptROI *cropRoi, *patchRoi;
     if(testCase == 33)
@@ -736,6 +748,19 @@ int main(int argc, char **argv)
 
                     break;
                 }
+                case 32:
+                {
+                    testCaseName = "erase";
+
+                    init_erase(batchSize, boxesInEachImage, numOfBoxes, anchorBoxInfoTensor, roiTensorPtrSrc, srcDescPtr->c, colorBuffer, inputBitDepth);
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_erase_gpu(d_input, srcDescPtr, d_output, dstDescPtr, anchorBoxInfoTensor, colorBuffer, numOfBoxes, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
+                }
                 case 33:
                 {
                     testCaseName = "crop_and_patch";
@@ -1433,6 +1458,12 @@ int main(int argc, char **argv)
     }
     if (reductionTypeCase)
         CHECK_RETURN_STATUS(hipHostFree(reductionFuncResultArr));
+    if(testCase == 32)
+    {
+        CHECK_RETURN_STATUS(hipHostFree(colorBuffer));
+        CHECK_RETURN_STATUS(hipHostFree(anchorBoxInfoTensor));
+        CHECK_RETURN_STATUS(hipHostFree(numOfBoxes));
+    }
     if(anchorTensor != NULL)
         CHECK_RETURN_STATUS(hipHostFree(anchorTensor));
     if(shapeTensor != NULL)
diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index d32d20e2b..f62103091 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -296,7 +296,7 @@ def rpp_test_suite_parser_and_validator():
 subprocess.run(["make", "-j16"], cwd=".")    # nosec
 
 # List of cases supported
-supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90']
+supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '32', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90']
 
 # Create folders based on testType and profilingOption
 if testType == 1 and profilingOption == "YES":
diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp
index 051d4045a..f85ea5704 100644
--- a/utilities/test_suite/HOST/Tensor_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_host.cpp
@@ -740,6 +740,47 @@ int main(int argc, char **argv)
 
                     break;
                 }
+                case 32:
+                {
+                    testCaseName = "erase";
+                    Rpp32u boxesInEachImage = 3;
+                    Rpp32f colorBuffer[batchSize * boxesInEachImage];
+                    RpptRoiLtrb anchorBoxInfoTensor[batchSize * boxesInEachImage];
+                    Rpp32u numOfBoxes[batchSize];
+                    int idx;
+
+                    init_erase(batchSize, boxesInEachImage, numOfBoxes, anchorBoxInfoTensor, roiTensorPtrSrc, srcDescPtr->c, colorBuffer, inputBitDepth);
+
+                    startWallTime = omp_get_wtime();
+                    startCpuTime = clock();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_erase_host(input, srcDescPtr, output, dstDescPtr, anchorBoxInfoTensor, colorBuffer, numOfBoxes, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
+                }
+                case 33:
+                {
+                    testCaseName = "crop_and_patch";
+
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        cropRoi[i].xywhROI.xy.x = patchRoi[i].xywhROI.xy.x = roiList[0];
+                        cropRoi[i].xywhROI.xy.y = patchRoi[i].xywhROI.xy.y = roiList[1];
+                        cropRoi[i].xywhROI.roiWidth = patchRoi[i].xywhROI.roiWidth = roiWidthList[i];
+                        cropRoi[i].xywhROI.roiHeight = patchRoi[i].xywhROI.roiHeight = roiHeightList[i];
+                    }
+
+                    startWallTime = omp_get_wtime();
+                    startCpuTime = clock();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_crop_and_patch_host(input, input_second, srcDescPtr, output, dstDescPtr, roiTensorPtrDst, cropRoi, patchRoi, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
+                }
                 case 34:
                 {
                     testCaseName = "lut";
@@ -803,27 +844,6 @@ int main(int argc, char **argv)
 
                     break;
                 }
-                case 33:
-                {
-                    testCaseName = "crop_and_patch";
-
-                    for (i = 0; i < batchSize; i++)
-                    {
-                        cropRoi[i].xywhROI.xy.x = patchRoi[i].xywhROI.xy.x = roiList[0];
-                        cropRoi[i].xywhROI.xy.y = patchRoi[i].xywhROI.xy.y = roiList[1];
-                        cropRoi[i].xywhROI.roiWidth = patchRoi[i].xywhROI.roiWidth = roiWidthList[i];
-                        cropRoi[i].xywhROI.roiHeight = patchRoi[i].xywhROI.roiHeight = roiHeightList[i];
-                    }
-
-                    startWallTime = omp_get_wtime();
-                    startCpuTime = clock();
-                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                        rppt_crop_and_patch_host(input, input_second, srcDescPtr, output, dstDescPtr, roiTensorPtrDst, cropRoi, patchRoi, roiTypeSrc, handle);
-                    else
-                        missingFuncFlag = 1;
-
-                    break;
-                }
                 case 37:
                 {
                     testCaseName = "crop";
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index 6f8eb7f70..f83f981fa 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -272,7 +272,7 @@ def rpp_test_suite_parser_and_validator():
 subprocess.run(["make", "-j16"], cwd=".")    # nosec
 
 # List of cases supported
-supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90']
+supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '32', '33', '34', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90']
 
 print("\n\n\n\n\n")
 print("##########################################################################################")
diff --git a/utilities/test_suite/REFERENCE_OUTPUT/erase/erase_u8_Tensor.bin b/utilities/test_suite/REFERENCE_OUTPUT/erase/erase_u8_Tensor.bin
new file mode 100644
index 000000000..b3f0894ee
Binary files /dev/null and b/utilities/test_suite/REFERENCE_OUTPUT/erase/erase_u8_Tensor.bin differ
diff --git a/utilities/test_suite/rpp_test_suite_common.h b/utilities/test_suite/rpp_test_suite_common.h
index 5623c1b7b..5e1528c81 100644
--- a/utilities/test_suite/rpp_test_suite_common.h
+++ b/utilities/test_suite/rpp_test_suite_common.h
@@ -81,6 +81,7 @@ std::map<int, string> augmentationMap =
     {29, "water"},
     {30, "non_linear_blend"},
     {31, "color_cast"},
+    {32, "erase"},
     {33, "crop_and_patch"},
     {34, "lut"},
     {36, "color_twist"},
@@ -1396,3 +1397,77 @@ void init_slice(RpptGenericDescPtr descriptorPtr3D, RpptROIPtr roiPtrSrc, Rpp32u
         }
     }
 }
+
+// Erase Region initializer for unit and performance testing
+void inline init_erase(int batchSize, int boxesInEachImage, Rpp32u* numOfBoxes, RpptRoiLtrb* anchorBoxInfoTensor, RpptROIPtr roiTensorPtrSrc, int channels, Rpp32f *colorBuffer, int inputBitDepth)
+{
+    Rpp8u *colors8u = reinterpret_cast<Rpp8u *>(colorBuffer);
+    Rpp16f *colors16f = reinterpret_cast<Rpp16f *>(colorBuffer);
+    Rpp32f *colors32f = colorBuffer;
+    Rpp8s *colors8s = reinterpret_cast<Rpp8s *>(colorBuffer);
+    for(int i = 0; i < batchSize; i++)
+    {
+        numOfBoxes[i] = boxesInEachImage;
+        int idx = boxesInEachImage * i;
+
+        anchorBoxInfoTensor[idx].lt.x = 0.125 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+        anchorBoxInfoTensor[idx].lt.y = 0.125 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+        anchorBoxInfoTensor[idx].rb.x = 0.375 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+        anchorBoxInfoTensor[idx].rb.y = 0.375 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+
+        idx++;
+        anchorBoxInfoTensor[idx].lt.x = 0.125 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+        anchorBoxInfoTensor[idx].lt.y = 0.625 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+        anchorBoxInfoTensor[idx].rb.x = 0.875 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+        anchorBoxInfoTensor[idx].rb.y = 0.875 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+
+        idx++;
+        anchorBoxInfoTensor[idx].lt.x = 0.75 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+        anchorBoxInfoTensor[idx].lt.y = 0.125 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+        anchorBoxInfoTensor[idx].rb.x = 0.875 * roiTensorPtrSrc[i].xywhROI.roiWidth;
+        anchorBoxInfoTensor[idx].rb.y = 0.5 * roiTensorPtrSrc[i].xywhROI.roiHeight;
+
+        if(channels == 3)
+        {
+            int idx = boxesInEachImage * 3 * i;
+            colorBuffer[idx] = 0;
+            colorBuffer[idx + 1] = 0;
+            colorBuffer[idx + 2] = 240;
+            colorBuffer[idx + 3] = 0;
+            colorBuffer[idx + 4] = 240;
+            colorBuffer[idx + 5] = 0;
+            colorBuffer[idx + 6] = 240;
+            colorBuffer[idx + 7] = 0;
+            colorBuffer[idx + 8] = 0;
+            for (int j = 0; j < 9; j++)
+            {
+                if (!inputBitDepth)
+                    colors8u[idx + j] = (Rpp8u)(colorBuffer[idx + j]);
+                else if (inputBitDepth == 1)
+                    colors16f[idx + j] = (Rpp16f)(colorBuffer[idx + j] * ONE_OVER_255);
+                else if (inputBitDepth == 2)
+                    colors32f[idx + j] = (Rpp32f)(colorBuffer[idx + j] * ONE_OVER_255);
+                else if (inputBitDepth == 5)
+                    colors8s[idx + j] = (Rpp8s)(colorBuffer[idx + j] - 128);
+            }
+        }
+        else
+        {
+            int idx = boxesInEachImage * i;
+            colorBuffer[idx] = 240;
+            colorBuffer[idx + 1] = 120;
+            colorBuffer[idx + 2] = 60;
+            for (int j = 0; j < 3; j++)
+            {
+                if (!inputBitDepth)
+                    colors8u[idx + j] = (Rpp8u)(colorBuffer[idx + j]);
+                else if (inputBitDepth == 1)
+                    colors16f[idx + j] = (Rpp16f)(colorBuffer[idx + j] * ONE_OVER_255);
+                else if (inputBitDepth == 2)
+                    colors32f[idx + j] = (Rpp32f)(colorBuffer[idx + j] * ONE_OVER_255);
+                else if (inputBitDepth == 5)
+                    colors8s[idx + j] = (Rpp8s)(colorBuffer[idx + j] - 128);
+            }
+        }
+    }
+}