diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp
index 7bd46b39e..00b7d45ac 100644
--- a/utilities/test_suite/HIP/Tensor_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_hip.cpp
@@ -348,727 +348,727 @@ int main(int argc, char **argv)
 
     // case-wise RPP API and measure time script for Unit and Performance test
     printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", func.c_str(), numRuns, batchSize);
-    for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++)
+    for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
-        for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
-        {
-            vector<string>::const_iterator imagesPathStart = imageNamesPath.begin() + (iterCount * batchSize);
-            vector<string>::const_iterator imagesPathEnd = imagesPathStart + batchSize;
-            vector<string>::const_iterator imageNamesStart = imageNames.begin() + (iterCount * batchSize);
-            vector<string>::const_iterator imageNamesEnd = imageNamesStart + batchSize;
-            vector<string>::const_iterator imagesPathSecondStart = imageNamesPathSecond.begin() + (iterCount * batchSize);
-            vector<string>::const_iterator imagesPathSecondEnd = imagesPathSecondStart + batchSize;
-
-            // Set ROIs for src/dst
-            set_src_and_dst_roi(imagesPathStart, imagesPathEnd, roiTensorPtrSrc, roiTensorPtrDst, dstImgSizes);
+        vector<string>::const_iterator imagesPathStart = imageNamesPath.begin() + (iterCount * batchSize);
+        vector<string>::const_iterator imagesPathEnd = imagesPathStart + batchSize;
+        vector<string>::const_iterator imageNamesStart = imageNames.begin() + (iterCount * batchSize);
+        vector<string>::const_iterator imageNamesEnd = imageNamesStart + batchSize;
+        vector<string>::const_iterator imagesPathSecondStart = imageNamesPathSecond.begin() + (iterCount * batchSize);
+        vector<string>::const_iterator imagesPathSecondEnd = imagesPathSecondStart + batchSize;
+
+        // Set ROIs for src/dst
+        set_src_and_dst_roi(imagesPathStart, imagesPathEnd, roiTensorPtrSrc, roiTensorPtrDst, dstImgSizes);
+
+        //Read images
+        if(decoderType == 0)
+            read_image_batch_turbojpeg(inputu8, srcDescPtr, imagesPathStart);
+        else
+            read_image_batch_opencv(inputu8, srcDescPtr, imagesPathStart);
+
+        // if the input layout requested is PLN3, convert PKD3 inputs to PLN3 for first and second input batch
+        if (layoutType == 1)
+            convert_pkd3_to_pln3(inputu8, srcDescPtr);
 
-            //Read images
+        if(dualInputCase)
+        {
             if(decoderType == 0)
-                read_image_batch_turbojpeg(inputu8, srcDescPtr, imagesPathStart);
+                read_image_batch_turbojpeg(inputu8Second, srcDescPtr, imagesPathSecondStart);
             else
-                read_image_batch_opencv(inputu8, srcDescPtr, imagesPathStart);
-
-            // if the input layout requested is PLN3, convert PKD3 inputs to PLN3 for first and second input batch
+                read_image_batch_opencv(inputu8Second, srcDescPtr, imagesPathSecondStart);
             if (layoutType == 1)
-                convert_pkd3_to_pln3(inputu8, srcDescPtr);
-
-            if(dualInputCase)
-            {
-                if(decoderType == 0)
-                    read_image_batch_turbojpeg(inputu8Second, srcDescPtr, imagesPathSecondStart);
-                else
-                    read_image_batch_opencv(inputu8Second, srcDescPtr, imagesPathSecondStart);
-                if (layoutType == 1)
-                    convert_pkd3_to_pln3(inputu8Second, srcDescPtr);
-            }
+                convert_pkd3_to_pln3(inputu8Second, srcDescPtr);
+        }
 
-            // Convert inputs to correponding bit depth specified by user
-            convert_input_bitdepth(input, input_second, inputu8, inputu8Second, inputBitDepth, ioBufferSize, inputBufferSize, srcDescPtr, dualInputCase, conversionFactor);
+        // Convert inputs to correponding bit depth specified by user
+        convert_input_bitdepth(input, input_second, inputu8, inputu8Second, inputBitDepth, ioBufferSize, inputBufferSize, srcDescPtr, dualInputCase, conversionFactor);
 
-            //copy decoded inputs to hip buffers
-            CHECK(hipMemcpy(d_input, input, inputBufferSize, hipMemcpyHostToDevice));
-            CHECK(hipMemcpy(d_output, output, outputBufferSize, hipMemcpyHostToDevice));
-            if(dualInputCase)
-                CHECK(hipMemcpy(d_input_second, input_second, inputBufferSize, hipMemcpyHostToDevice));
+        //copy decoded inputs to hip buffers
+        CHECK(hipMemcpy(d_input, input, inputBufferSize, hipMemcpyHostToDevice));
+        CHECK(hipMemcpy(d_output, output, outputBufferSize, hipMemcpyHostToDevice));
+        if(dualInputCase)
+            CHECK(hipMemcpy(d_input_second, input_second, inputBufferSize, hipMemcpyHostToDevice));
 
-            int roiHeightList[batchSize], roiWidthList[batchSize];
-            if(roiList[0] == 0 && roiList[1] == 0 && roiList[2] == 0 && roiList[3] == 0)
+        int roiHeightList[batchSize], roiWidthList[batchSize];
+        if(roiList[0] == 0 && roiList[1] == 0 && roiList[2] == 0 && roiList[3] == 0)
+        {
+            for(int i = 0; i < batchSize ; i++)
             {
-                for(int i = 0; i < batchSize ; i++)
-                {
-                    roiList[0] = 10;
-                    roiList[1] = 10;
-                    roiWidthList[i] = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
-                    roiHeightList[i] = roiTensorPtrSrc[i].xywhROI.roiHeight / 2;
-                }
+                roiList[0] = 10;
+                roiList[1] = 10;
+                roiWidthList[i] = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
+                roiHeightList[i] = roiTensorPtrSrc[i].xywhROI.roiHeight / 2;
             }
-            else
+        }
+        else
+        {
+            for(int i = 0; i < batchSize ; i++)
             {
-                for(int i = 0; i < batchSize ; i++)
-                {
-                    roiWidthList[i] = roiList[2];
-                    roiHeightList[i] = roiList[3];
-                }
+                roiWidthList[i] = roiList[2];
+                roiHeightList[i] = roiList[3];
             }
+        }
 
-            // Uncomment to run test case with an xywhROI override
-            // roi.xywhROI = {0, 0, 25, 25};
-            // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize);
-            // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize);
+        // Uncomment to run test case with an xywhROI override
+        // roi.xywhROI = {0, 0, 25, 25};
+        // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize);
+        // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize);
 
-            // Uncomment to run test case with an ltrbROI override
-            // roiTypeSrc = RpptRoiType::LTRB;
-            // roi.ltrbROI = {10, 10, 40, 40};
-            // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize);
-            // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize);
+        // Uncomment to run test case with an ltrbROI override
+        // roiTypeSrc = RpptRoiType::LTRB;
+        // roi.ltrbROI = {10, 10, 40, 40};
+        // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize);
+        // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize);
 
+        for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++)
+        {
             double startWallTime, endWallTime;
             switch (testCase)
             {
-            case 0:
-            {
-                testCaseName = "brightness";
-
-                Rpp32f alpha[batchSize];
-                Rpp32f beta[batchSize];
-                for (i = 0; i < batchSize; i++)
+                case 0:
                 {
-                    alpha[i] = 1.75;
-                    beta[i] = 50;
-                }
+                    testCaseName = "brightness";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_brightness_gpu(d_input, srcDescPtr, d_output, dstDescPtr, alpha, beta, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    Rpp32f alpha[batchSize];
+                    Rpp32f beta[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        alpha[i] = 1.75;
+                        beta[i] = 50;
+                    }
 
-                break;
-            }
-            case 1:
-            {
-                testCaseName = "gamma_correction";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_brightness_gpu(d_input, srcDescPtr, d_output, dstDescPtr, alpha, beta, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                Rpp32f gammaVal[batchSize];
-                for (i = 0; i < batchSize; i++)
-                    gammaVal[i] = 1.9;
+                    break;
+                }
+                case 1:
+                {
+                    testCaseName = "gamma_correction";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_gamma_correction_gpu(d_input, srcDescPtr, d_output, dstDescPtr, gammaVal, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    Rpp32f gammaVal[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                        gammaVal[i] = 1.9;
 
-                break;
-            }
-            case 2:
-            {
-                testCaseName = "blend";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_gamma_correction_gpu(d_input, srcDescPtr, d_output, dstDescPtr, gammaVal, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                Rpp32f alpha[batchSize];
-                for (i = 0; i < batchSize; i++)
-                    alpha[i] = 0.4;
+                    break;
+                }
+                case 2:
+                {
+                    testCaseName = "blend";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_blend_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, alpha, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    Rpp32f alpha[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                        alpha[i] = 0.4;
 
-                break;
-            }
-            case 4:
-            {
-                testCaseName = "contrast";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_blend_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, alpha, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                Rpp32f contrastFactor[batchSize];
-                Rpp32f contrastCenter[batchSize];
-                for (i = 0; i < batchSize; i++)
-                {
-                    contrastFactor[i] = 2.96;
-                    contrastCenter[i] = 128;
+                    break;
                 }
+                case 4:
+                {
+                    testCaseName = "contrast";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_contrast_gpu(d_input, srcDescPtr, d_output, dstDescPtr, contrastFactor, contrastCenter, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    Rpp32f contrastFactor[batchSize];
+                    Rpp32f contrastCenter[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        contrastFactor[i] = 2.96;
+                        contrastCenter[i] = 128;
+                    }
 
-                break;
-            }
-            case 13:
-            {
-                testCaseName = "exposure";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_contrast_gpu(d_input, srcDescPtr, d_output, dstDescPtr, contrastFactor, contrastCenter, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                Rpp32f exposureFactor[batchSize];
-                for (i = 0; i < batchSize; i++)
-                    exposureFactor[i] = 1.4;
+                    break;
+                }
+                case 13:
+                {
+                    testCaseName = "exposure";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_exposure_gpu(d_input, srcDescPtr, d_output, dstDescPtr, exposureFactor, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    Rpp32f exposureFactor[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                        exposureFactor[i] = 1.4;
 
-                break;
-            }
-            case 20:
-            {
-                testCaseName = "flip";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_exposure_gpu(d_input, srcDescPtr, d_output, dstDescPtr, exposureFactor, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                Rpp32u horizontalFlag[batchSize];
-                Rpp32u verticalFlag[batchSize];
-                for (i = 0; i < batchSize; i++)
-                {
-                    horizontalFlag[i] = 1;
-                    verticalFlag[i] = 0;
+                    break;
                 }
+                case 20:
+                {
+                    testCaseName = "flip";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_flip_gpu(d_input, srcDescPtr, d_output, dstDescPtr, horizontalFlag, verticalFlag, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    Rpp32u horizontalFlag[batchSize];
+                    Rpp32u verticalFlag[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        horizontalFlag[i] = 1;
+                        verticalFlag[i] = 0;
+                    }
 
-                break;
-            }
-            case 21:
-            {
-                testCaseName = "resize";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_flip_gpu(d_input, srcDescPtr, d_output, dstDescPtr, horizontalFlag, verticalFlag, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                for (i = 0; i < batchSize; i++)
-                {
-                    dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
-                    dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiTensorPtrSrc[i].xywhROI.roiHeight / 2;
+                    break;
                 }
+                case 21:
+                {
+                    testCaseName = "resize";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_resize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, roiTensorPtrDst, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
+                        dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiTensorPtrSrc[i].xywhROI.roiHeight / 2;
+                    }
 
-                break;
-            }
-            case 23:
-            {
-                testCaseName = "rotate";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_resize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, roiTensorPtrDst, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR))
-                {
-                    missingFuncFlag = 1;
                     break;
                 }
+                case 23:
+                {
+                    testCaseName = "rotate";
 
-                Rpp32f angle[batchSize];
-                for (i = 0; i < batchSize; i++)
-                    angle[i] = 50;
-
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_rotate_gpu(d_input, srcDescPtr, d_output, dstDescPtr, angle, interpolationType, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR))
+                    {
+                        missingFuncFlag = 1;
+                        break;
+                    }
 
-                break;
-            }
-            case 29:
-            {
-                testCaseName = "water";
+                    Rpp32f angle[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                        angle[i] = 50;
 
-                Rpp32f amplX[batchSize];
-                Rpp32f amplY[batchSize];
-                Rpp32f freqX[batchSize];
-                Rpp32f freqY[batchSize];
-                Rpp32f phaseX[batchSize];
-                Rpp32f phaseY[batchSize];
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_rotate_gpu(d_input, srcDescPtr, d_output, dstDescPtr, angle, interpolationType, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                for (i = 0; i < batchSize; i++)
-                {
-                    amplX[i] = 2.0f;
-                    amplY[i] = 5.0f;
-                    freqX[i] = 5.8f;
-                    freqY[i] = 1.2f;
-                    phaseX[i] = 10.0f;
-                    phaseY[i] = 15.0f;
+                    break;
                 }
+                case 29:
+                {
+                    testCaseName = "water";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_water_gpu(d_input, srcDescPtr, d_output, dstDescPtr, amplX, amplY, freqX, freqY, phaseX, phaseY, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    Rpp32f amplX[batchSize];
+                    Rpp32f amplY[batchSize];
+                    Rpp32f freqX[batchSize];
+                    Rpp32f freqY[batchSize];
+                    Rpp32f phaseX[batchSize];
+                    Rpp32f phaseY[batchSize];
 
-                break;
-            }
-            case 30:
-            {
-                testCaseName = "non_linear_blend";
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        amplX[i] = 2.0f;
+                        amplY[i] = 5.0f;
+                        freqX[i] = 5.8f;
+                        freqY[i] = 1.2f;
+                        phaseX[i] = 10.0f;
+                        phaseY[i] = 15.0f;
+                    }
 
-                Rpp32f stdDev[batchSize];
-                for (i = 0; i < batchSize; i++)
-                    stdDev[i] = 50.0;
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_water_gpu(d_input, srcDescPtr, d_output, dstDescPtr, amplX, amplY, freqX, freqY, phaseX, phaseY, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_non_linear_blend_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, stdDev, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    break;
+                }
+                case 30:
+                {
+                    testCaseName = "non_linear_blend";
 
-                break;
-            }
-            case 31:
-            {
-                testCaseName = "color_cast";
+                    Rpp32f stdDev[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                        stdDev[i] = 50.0;
 
-                RpptRGB rgbTensor[batchSize];
-                Rpp32f alphaTensor[batchSize];
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_non_linear_blend_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, stdDev, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                for (i = 0; i < batchSize; i++)
-                {
-                    rgbTensor[i].R = 0;
-                    rgbTensor[i].G = 0;
-                    rgbTensor[i].B = 100;
-                    alphaTensor[i] = 0.5;
+                    break;
                 }
+                case 31:
+                {
+                    testCaseName = "color_cast";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_color_cast_gpu(d_input, srcDescPtr, d_output, dstDescPtr, rgbTensor, alphaTensor, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
-
-                break;
-            }
-            case 34:
-            {
-                testCaseName = "lut";
-
-                Rpp32f *lutBuffer;
-                CHECK(hipHostMalloc(&lutBuffer, 65536 * sizeof(Rpp32f)));
-                CHECK(hipMemset(lutBuffer, 0, 65536 * sizeof(Rpp32f)));
-                Rpp8u *lut8u = reinterpret_cast<Rpp8u *>(lutBuffer);
-                Rpp16f *lut16f = reinterpret_cast<Rpp16f *>(lutBuffer);
-                Rpp32f *lut32f = reinterpret_cast<Rpp32f *>(lutBuffer);
-                Rpp8s *lut8s = reinterpret_cast<Rpp8s *>(lutBuffer);
-                if (inputBitDepth == 0)
-                    for (j = 0; j < 256; j++)
-                        lut8u[j] = (Rpp8u)(255 - j);
-                else if (inputBitDepth == 3)
-                    for (j = 0; j < 256; j++)
-                        lut16f[j] = (Rpp16f)((255 - j) * ONE_OVER_255);
-                else if (inputBitDepth == 4)
-                    for (j = 0; j < 256; j++)
-                        lut32f[j] = (Rpp32f)((255 - j) * ONE_OVER_255);
-                else if (inputBitDepth == 5)
-                    for (j = 0; j < 256; j++)
-                        lut8s[j] = (Rpp8s)(255 - j - 128);
-
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0)
-                    rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut8u, roiTensorPtrSrc, roiTypeSrc, handle);
-                else if (inputBitDepth == 3)
-                    rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut16f, roiTensorPtrSrc, roiTypeSrc, handle);
-                else if (inputBitDepth == 4)
-                    rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut32f, roiTensorPtrSrc, roiTypeSrc, handle);
-                else if (inputBitDepth == 5)
-                    rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut8s, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    RpptRGB rgbTensor[batchSize];
+                    Rpp32f alphaTensor[batchSize];
 
-                break;
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        rgbTensor[i].R = 0;
+                        rgbTensor[i].G = 0;
+                        rgbTensor[i].B = 100;
+                        alphaTensor[i] = 0.5;
+                    }
 
-                CHECK(hipHostFree(lutBuffer));
-            }
-            case 36:
-            {
-                testCaseName = "color_twist";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_color_cast_gpu(d_input, srcDescPtr, d_output, dstDescPtr, rgbTensor, alphaTensor, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                Rpp32f brightness[batchSize];
-                Rpp32f contrast[batchSize];
-                Rpp32f hue[batchSize];
-                Rpp32f saturation[batchSize];
-                for (i = 0; i < batchSize; i++)
+                    break;
+                }
+                case 34:
                 {
-                    brightness[i] = 1.4;
-                    contrast[i] = 0.0;
-                    hue[i] = 60.0;
-                    saturation[i] = 1.9;
+                    testCaseName = "lut";
+
+                    Rpp32f *lutBuffer;
+                    CHECK(hipHostMalloc(&lutBuffer, 65536 * sizeof(Rpp32f)));
+                    CHECK(hipMemset(lutBuffer, 0, 65536 * sizeof(Rpp32f)));
+                    Rpp8u *lut8u = reinterpret_cast<Rpp8u *>(lutBuffer);
+                    Rpp16f *lut16f = reinterpret_cast<Rpp16f *>(lutBuffer);
+                    Rpp32f *lut32f = reinterpret_cast<Rpp32f *>(lutBuffer);
+                    Rpp8s *lut8s = reinterpret_cast<Rpp8s *>(lutBuffer);
+                    if (inputBitDepth == 0)
+                        for (j = 0; j < 256; j++)
+                            lut8u[j] = (Rpp8u)(255 - j);
+                    else if (inputBitDepth == 3)
+                        for (j = 0; j < 256; j++)
+                            lut16f[j] = (Rpp16f)((255 - j) * ONE_OVER_255);
+                    else if (inputBitDepth == 4)
+                        for (j = 0; j < 256; j++)
+                            lut32f[j] = (Rpp32f)((255 - j) * ONE_OVER_255);
+                    else if (inputBitDepth == 5)
+                        for (j = 0; j < 256; j++)
+                            lut8s[j] = (Rpp8s)(255 - j - 128);
+
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0)
+                        rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut8u, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else if (inputBitDepth == 3)
+                        rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut16f, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else if (inputBitDepth == 4)
+                        rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut32f, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else if (inputBitDepth == 5)
+                        rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut8s, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
+
+                    CHECK(hipHostFree(lutBuffer));
                 }
+                case 36:
+                {
+                    testCaseName = "color_twist";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_color_twist_gpu(d_input, srcDescPtr, d_output, dstDescPtr, brightness, contrast, hue, saturation, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    Rpp32f brightness[batchSize];
+                    Rpp32f contrast[batchSize];
+                    Rpp32f hue[batchSize];
+                    Rpp32f saturation[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        brightness[i] = 1.4;
+                        contrast[i] = 0.0;
+                        hue[i] = 60.0;
+                        saturation[i] = 1.9;
+                    }
 
-                break;
-            }
-            case 37:
-            {
-                testCaseName = "crop";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_color_twist_gpu(d_input, srcDescPtr, d_output, dstDescPtr, brightness, contrast, hue, saturation, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                for (i = 0; i < batchSize; i++)
-                {
-                    roiTensorPtrDst[i].xywhROI.xy.x = roiList[0];
-                    roiTensorPtrDst[i].xywhROI.xy.y = roiList[1];
-                    dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiWidthList[i];
-                    dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiHeightList[i];
+                    break;
                 }
+                case 37:
+                {
+                    testCaseName = "crop";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_crop_gpu(d_input, srcDescPtr, d_output, dstDescPtr, roiTensorPtrDst, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        roiTensorPtrDst[i].xywhROI.xy.x = roiList[0];
+                        roiTensorPtrDst[i].xywhROI.xy.y = roiList[1];
+                        dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiWidthList[i];
+                        dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiHeightList[i];
+                    }
 
-                break;
-            }
-            case 38:
-            {
-                testCaseName = "crop_mirror_normalize";
-                Rpp32f multiplier[batchSize * srcDescPtr->c];
-                Rpp32f offset[batchSize * srcDescPtr->c];
-                Rpp32u mirror[batchSize];
-                if (srcDescPtr->c == 3)
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_crop_gpu(d_input, srcDescPtr, d_output, dstDescPtr, roiTensorPtrDst, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
+                }
+                case 38:
                 {
-                    Rpp32f meanParam[3] = { 60.0f, 80.0f, 100.0f };
-                    Rpp32f stdDevParam[3] = { 0.9f, 0.9f, 0.9f };
-                    Rpp32f offsetParam[3] = { - meanParam[0] / stdDevParam[0], - meanParam[1] / stdDevParam[1], - meanParam[2] / stdDevParam[2] };
-                    Rpp32f multiplierParam[3] = {  1.0f / stdDevParam[0], 1.0f / stdDevParam[1], 1.0f / stdDevParam[2] };
+                    testCaseName = "crop_mirror_normalize";
+                    Rpp32f multiplier[batchSize * srcDescPtr->c];
+                    Rpp32f offset[batchSize * srcDescPtr->c];
+                    Rpp32u mirror[batchSize];
+                    if (srcDescPtr->c == 3)
+                    {
+                        Rpp32f meanParam[3] = { 60.0f, 80.0f, 100.0f };
+                        Rpp32f stdDevParam[3] = { 0.9f, 0.9f, 0.9f };
+                        Rpp32f offsetParam[3] = { - meanParam[0] / stdDevParam[0], - meanParam[1] / stdDevParam[1], - meanParam[2] / stdDevParam[2] };
+                        Rpp32f multiplierParam[3] = {  1.0f / stdDevParam[0], 1.0f / stdDevParam[1], 1.0f / stdDevParam[2] };
+
+                        for (i = 0, j = 0; i < batchSize; i++, j += 3)
+                        {
+                            multiplier[j] = multiplierParam[0];
+                            offset[j] = offsetParam[0];
+                            multiplier[j + 1] = multiplierParam[1];
+                            offset[j + 1] = offsetParam[1];
+                            multiplier[j + 2] = multiplierParam[2];
+                            offset[j + 2] = offsetParam[2];
+                            mirror[i] = 1;
+                        }
+                    }
+                    else if(srcDescPtr->c == 1)
+                    {
+                        Rpp32f meanParam = 100.0f;
+                        Rpp32f stdDevParam = 0.9f;
+                        Rpp32f offsetParam = - meanParam / stdDevParam;
+                        Rpp32f multiplierParam = 1.0f / stdDevParam;
+
+                        for (i = 0; i < batchSize; i++)
+                        {
+                            multiplier[i] = multiplierParam;
+                            offset[i] = offsetParam;
+                            mirror[i] = 1;
+                        }
+                    }
 
-                    for (i = 0, j = 0; i < batchSize; i++, j += 3)
+                    for (i = 0; i < batchSize; i++)
                     {
-                        multiplier[j] = multiplierParam[0];
-                        offset[j] = offsetParam[0];
-                        multiplier[j + 1] = multiplierParam[1];
-                        offset[j + 1] = offsetParam[1];
-                        multiplier[j + 2] = multiplierParam[2];
-                        offset[j + 2] = offsetParam[2];
-                        mirror[i] = 1;
+                        roiTensorPtrDst[i].xywhROI.xy.x = roiList[0];
+                        roiTensorPtrDst[i].xywhROI.xy.y = roiList[1];
+                        dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiWidthList[i];
+                        dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiHeightList[i];
                     }
+
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 3 || inputBitDepth == 4 || inputBitDepth == 5)
+                        rppt_crop_mirror_normalize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, offset, multiplier, mirror, roiTensorPtrDst, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
                 }
-                else if(srcDescPtr->c == 1)
+                case 39:
                 {
-                    Rpp32f meanParam = 100.0f;
-                    Rpp32f stdDevParam = 0.9f;
-                    Rpp32f offsetParam = - meanParam / stdDevParam;
-                    Rpp32f multiplierParam = 1.0f / stdDevParam;
+                    testCaseName = "resize_crop_mirror";
 
-                    for (i = 0; i < batchSize; i++)
+                    if (interpolationType != RpptInterpolationType::BILINEAR)
                     {
-                        multiplier[i] = multiplierParam;
-                        offset[i] = offsetParam;
+                        missingFuncFlag = 1;
+                        break;
+                    }
+
+                    Rpp32u mirror[batchSize];
+                    for (i = 0; i < batchSize; i++)
                         mirror[i] = 1;
+
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        roiTensorPtrSrc[i].xywhROI.xy.x = 10;
+                        roiTensorPtrSrc[i].xywhROI.xy.y = 10;
+                        dstImgSizes[i].width = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
+                        dstImgSizes[i].height = roiTensorPtrSrc[i].xywhROI.roiHeight / 2;
+                        roiTensorPtrDst[i].xywhROI.roiWidth = 50;
+                        roiTensorPtrDst[i].xywhROI.roiHeight = 50;
                     }
-                }
 
-                for (i = 0; i < batchSize; i++)
-                {
-                    roiTensorPtrDst[i].xywhROI.xy.x = roiList[0];
-                    roiTensorPtrDst[i].xywhROI.xy.y = roiList[1];
-                    dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiWidthList[i];
-                    dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiHeightList[i];
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 3 || inputBitDepth == 4 || inputBitDepth == 5)
+                        rppt_resize_crop_mirror_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, mirror, roiTensorPtrDst, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
+
+                    break;
                 }
+                case 45:
+                {
+                    testCaseName = "color_temperature";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 3 || inputBitDepth == 4 || inputBitDepth == 5)
-                    rppt_crop_mirror_normalize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, offset, multiplier, mirror, roiTensorPtrDst, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    Rpp32s adjustment[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                        adjustment[i] = 70;
 
-                break;
-            }
-            case 39:
-            {
-                testCaseName = "resize_crop_mirror";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_color_temperature_gpu(d_input, srcDescPtr, d_output, dstDescPtr, adjustment, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                if (interpolationType != RpptInterpolationType::BILINEAR)
-                {
-                    missingFuncFlag = 1;
                     break;
                 }
+                case 49:
+                {
+                    testCaseName = "box_filter";
+                    Rpp32u kernelSize = additionalParam;
 
-                Rpp32u mirror[batchSize];
-                for (i = 0; i < batchSize; i++)
-                    mirror[i] = 1;
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_box_filter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                for (i = 0; i < batchSize; i++)
-                {
-                    roiTensorPtrSrc[i].xywhROI.xy.x = 10;
-                    roiTensorPtrSrc[i].xywhROI.xy.y = 10;
-                    dstImgSizes[i].width = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
-                    dstImgSizes[i].height = roiTensorPtrSrc[i].xywhROI.roiHeight / 2;
-                    roiTensorPtrDst[i].xywhROI.roiWidth = 50;
-                    roiTensorPtrDst[i].xywhROI.roiHeight = 50;
+                    break;
                 }
+                case 54:
+                {
+                    testCaseName = "gaussian_filter";
+                    Rpp32u kernelSize = additionalParam;
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 3 || inputBitDepth == 4 || inputBitDepth == 5)
-                    rppt_resize_crop_mirror_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, mirror, roiTensorPtrDst, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
-
-                break;
-            }
-            case 45:
-            {
-                testCaseName = "color_temperature";
+                    Rpp32f stdDevTensor[batchSize];
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        stdDevTensor[i] = 5.0f;
+                    }
 
-                Rpp32s adjustment[batchSize];
-                for (i = 0; i < batchSize; i++)
-                    adjustment[i] = 70;
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_gaussian_filter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, stdDevTensor, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_color_temperature_gpu(d_input, srcDescPtr, d_output, dstDescPtr, adjustment, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    break;
+                }
+                case 61:
+                {
+                    testCaseName = "magnitude";
 
-                break;
-            }
-            case 49:
-            {
-                testCaseName = "box_filter";
-                Rpp32u kernelSize = additionalParam;
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_magnitude_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_box_filter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    break;
+                }
+                case 63:
+                {
+                    testCaseName = "phase";
 
-                break;
-            }
-            case 54:
-            {
-                testCaseName = "gaussian_filter";
-                Rpp32u kernelSize = additionalParam;
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_phase_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                Rpp32f stdDevTensor[batchSize];
-                for (i = 0; i < batchSize; i++)
-                {
-                    stdDevTensor[i] = 5.0f;
+                    break;
                 }
+                case 70:
+                {
+                    testCaseName = "copy";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_gaussian_filter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, stdDevTensor, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_copy_gpu(d_input, srcDescPtr, d_output, dstDescPtr, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                break;
-            }
-            case 61:
-            {
-                testCaseName = "magnitude";
+                    break;
+                }
+                case 80:
+                {
+                    testCaseName = "resize_mirror_normalize";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_magnitude_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    if (interpolationType != RpptInterpolationType::BILINEAR)
+                    {
+                        missingFuncFlag = 1;
+                        break;
+                    }
 
-                break;
-            }
-            case 63:
-            {
-                testCaseName = "phase";
+                    for (i = 0; i < batchSize; i++)
+                    {
+                        dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
+                        dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
+                    }
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_phase_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    Rpp32f mean[batchSize * 3];
+                    Rpp32f stdDev[batchSize * 3];
+                    Rpp32u mirror[batchSize];
+                    for (i = 0, j = 0; i < batchSize; i++, j += 3)
+                    {
+                        mean[j] = 60.0;
+                        stdDev[j] = 1.0;
 
-                break;
-            }
-            case 70:
-            {
-                testCaseName = "copy";
+                        mean[j + 1] = 80.0;
+                        stdDev[j + 1] = 1.0;
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_copy_gpu(d_input, srcDescPtr, d_output, dstDescPtr, handle);
-                else
-                    missingFuncFlag = 1;
+                        mean[j + 2] = 100.0;
+                        stdDev[j + 2] = 1.0;
+                        mirror[i] = 1;
+                    }
 
-                break;
-            }
-            case 80:
-            {
-                testCaseName = "resize_mirror_normalize";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_resize_mirror_normalize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, mean, stdDev, mirror, roiTensorPtrDst, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                if (interpolationType != RpptInterpolationType::BILINEAR)
-                {
-                    missingFuncFlag = 1;
                     break;
                 }
-
-                for (i = 0; i < batchSize; i++)
+                case 82:
                 {
-                    dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
-                    dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
+                    testCaseName = "ricap";
+
+                    Rpp32u permutationTensor[batchSize * 4];
+                    if(qaFlag)
+                        init_ricap_qa(maxWidth, maxHeight, batchSize, permutationTensor, roiPtrInputCropRegion);
+                    else
+                        init_ricap(maxWidth, maxHeight, batchSize, permutationTensor, roiPtrInputCropRegion);
+
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_ricap_gpu(d_input, srcDescPtr, d_output, dstDescPtr, permutationTensor, roiPtrInputCropRegion, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
+                    break;
                 }
-
-                Rpp32f mean[batchSize * 3];
-                Rpp32f stdDev[batchSize * 3];
-                Rpp32u mirror[batchSize];
-                for (i = 0, j = 0; i < batchSize; i++, j += 3)
+                case 83:
                 {
-                    mean[j] = 60.0;
-                    stdDev[j] = 1.0;
+                    testCaseName = "gridmask";
 
-                    mean[j + 1] = 80.0;
-                    stdDev[j + 1] = 1.0;
+                    Rpp32u tileWidth = 40;
+                    Rpp32f gridRatio = 0.6;
+                    Rpp32f gridAngle = 0.5;
+                    RpptUintVector2D translateVector;
+                    translateVector.x = 0.0;
+                    translateVector.y = 0.0;
 
-                    mean[j + 2] = 100.0;
-                    stdDev[j + 2] = 1.0;
-                    mirror[i] = 1;
-                }
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_gridmask_gpu(d_input, srcDescPtr, d_output, dstDescPtr, tileWidth, gridRatio, gridAngle, translateVector, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_resize_mirror_normalize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, mean, stdDev, mirror, roiTensorPtrDst, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    break;
+                }
+                case 84:
+                {
+                    testCaseName = "spatter";
 
-                break;
-            }
-            case 82:
-            {
-                testCaseName = "ricap";
-
-                Rpp32u permutationTensor[batchSize * 4];
-                if(qaFlag)
-                    init_ricap_qa(maxWidth, maxHeight, batchSize, permutationTensor, roiPtrInputCropRegion);
-                else
-                    init_ricap(maxWidth, maxHeight, batchSize, permutationTensor, roiPtrInputCropRegion);
-
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_ricap_gpu(d_input, srcDescPtr, d_output, dstDescPtr, permutationTensor, roiPtrInputCropRegion, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
-                break;
-            }
-            case 83:
-            {
-                testCaseName = "gridmask";
-
-                Rpp32u tileWidth = 40;
-                Rpp32f gridRatio = 0.6;
-                Rpp32f gridAngle = 0.5;
-                RpptUintVector2D translateVector;
-                translateVector.x = 0.0;
-                translateVector.y = 0.0;
-
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_gridmask_gpu(d_input, srcDescPtr, d_output, dstDescPtr, tileWidth, gridRatio, gridAngle, translateVector, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    RpptRGB spatterColor;
 
-                break;
-            }
-            case 84:
-            {
-                testCaseName = "spatter";
+                    // Mud Spatter
+                    spatterColor.R = 65;
+                    spatterColor.G = 50;
+                    spatterColor.B = 23;
 
-                RpptRGB spatterColor;
+                    // Blood Spatter
+                    // spatterColor.R = 98;
 
-                // Mud Spatter
-                spatterColor.R = 65;
-                spatterColor.G = 50;
-                spatterColor.B = 23;
+                    // Ink Spatter
+                    // spatterColor.R = 5;
+                    // spatterColor.G = 20;
+                    // spatterColor.B = 64;
 
-                // Blood Spatter
-                // spatterColor.R = 98;
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_spatter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, spatterColor, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                // Ink Spatter
-                // spatterColor.R = 5;
-                // spatterColor.G = 20;
-                // spatterColor.B = 64;
+                    break;
+                }
+                case 85:
+                {
+                    testCaseName = "swap_channels";
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_spatter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, spatterColor, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_swap_channels_gpu(d_input, srcDescPtr, d_output, dstDescPtr, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                break;
-            }
-            case 85:
-            {
-                testCaseName = "swap_channels";
-
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_swap_channels_gpu(d_input, srcDescPtr, d_output, dstDescPtr, handle);
-                else
-                    missingFuncFlag = 1;
+                    break;
+                }
+                case 86:
+                {
+                    testCaseName = "color_to_greyscale";
 
-                break;
-            }
-            case 86:
-            {
-                testCaseName = "color_to_greyscale";
+                    RpptSubpixelLayout srcSubpixelLayout = RpptSubpixelLayout::RGBtype;
 
-                RpptSubpixelLayout srcSubpixelLayout = RpptSubpixelLayout::RGBtype;
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_color_to_greyscale_gpu(d_input, srcDescPtr, d_output, dstDescPtr, srcSubpixelLayout, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_color_to_greyscale_gpu(d_input, srcDescPtr, d_output, dstDescPtr, srcSubpixelLayout, handle);
-                else
-                    missingFuncFlag = 1;
+                    break;
+                }
+                case 87:
+                {
+                    testCaseName = "tensor_sum";
 
-                break;
-            }
-            case 87:
-            {
-                testCaseName = "tensor_sum";
+                    if(srcDescPtr->c == 1)
+                        reductionFuncResultArrLength = srcDescPtr->n;
 
-                if(srcDescPtr->c == 1)
-                    reductionFuncResultArrLength = srcDescPtr->n;
+                    startWallTime = omp_get_wtime();
 
-                startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_tensor_sum_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_tensor_sum_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    break;
+                }
+                case 88:
+                {
+                    testCaseName = "tensor_min";
 
-                break;
-            }
-            case 88:
-            {
-                testCaseName = "tensor_min";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_tensor_min_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_tensor_min_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
-                    missingFuncFlag = 1;
+                    break;
+                }
+                case 89:
+                {
+                    testCaseName = "tensor_max";
 
-                break;
-            }
-            case 89:
-            {
-                testCaseName = "tensor_max";
+                    startWallTime = omp_get_wtime();
+                    if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
+                        rppt_tensor_max_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle);
+                    else
+                        missingFuncFlag = 1;
 
-                startWallTime = omp_get_wtime();
-                if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
-                    rppt_tensor_max_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle);
-                else
+                    break;
+                }
+                default:
                     missingFuncFlag = 1;
-
-                break;
-            }
-            default:
-                missingFuncFlag = 1;
-                break;
-            }
+                    break;
+                }
 
             CHECK(hipDeviceSynchronize());
             endWallTime = omp_get_wtime();
@@ -1081,117 +1081,118 @@ int main(int argc, char **argv)
 
             maxWallTime = max(maxWallTime, wallTime);
             minWallTime = min(minWallTime, wallTime);
-            avgWallTime += wallTime ;
-            wallTime *= 1000;
-            if (testType == 0)
+            avgWallTime += wallTime;
+        }
+        wallTime *= 1000;
+
+        if (testType == 0)
+        {
+            cout << "\n\nGPU Backend Wall Time: " << wallTime <<" ms/batch"<< endl;
+            // Display results for reduction functions
+            if (reductionTypeCase)
             {
-                cout << "\n\nGPU Backend Wall Time: " << wallTime <<" ms/batch"<< endl;
-                // Display results for reduction functions
-                if (reductionTypeCase)
+                if(srcDescPtr->c == 3)
+                    printf("\nReduction result (Batch of 3 channel images produces 4 results per image in batch): ");
+                else if(srcDescPtr->c == 1)
                 {
-                    if(srcDescPtr->c == 3)
-                        printf("\nReduction result (Batch of 3 channel images produces 4 results per image in batch): ");
-                    else if(srcDescPtr->c == 1)
-                    {
-                        printf("\nReduction result (Batch of 1 channel images produces 1 result per image in batch): ");
-                        reductionFuncResultArrLength = srcDescPtr->n;
-                    }
-
-                    // print reduction functions output array based on different bit depths, and precision desired
-                    int precision = ((dstDescPtr->dataType == RpptDataType::F32) || (dstDescPtr->dataType == RpptDataType::F16)) ? 3 : 0;
-                    if (dstDescPtr->dataType == RpptDataType::U8)
-                    {
-                        if (testCase == 87)
-                            print_array(static_cast<Rpp64u *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                        else
-                            print_array(static_cast<Rpp8u *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                    }
-                    else if (dstDescPtr->dataType == RpptDataType::F16)
-                    {
-                        if (testCase == 87)
-                            print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                        else
-                            print_array(static_cast<Rpp16f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                    }
-                    else if (dstDescPtr->dataType == RpptDataType::F32)
-                    {
-                        if (testCase == 87)
-                            print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                        else
-                            print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                    }
-                    else if (dstDescPtr->dataType == RpptDataType::I8)
-                    {
-                        if (testCase == 87)
-                            print_array(static_cast<Rpp64s *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                        else
-                            print_array(static_cast<Rpp8s *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                    }
-                    printf("\n");
+                    printf("\nReduction result (Batch of 1 channel images produces 1 result per image in batch): ");
+                    reductionFuncResultArrLength = srcDescPtr->n;
+                }
 
-                    /*Compare the output of the function with golden outputs only if
-                    1.QA Flag is set
-                    2.input bit depth 0 (U8)
-                    3.source and destination layout are the same*/
-                    if(qaFlag && inputBitDepth == 0 && (srcDescPtr->layout == dstDescPtr->layout) && !(randomOutputCase))
-                    {
-                        if (testCase == 87)
-                            compare_reduction_output(static_cast<uint64_t *>(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath);
-                        else
-                            compare_reduction_output(static_cast<Rpp8u *>(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath);
-                    }
+                // print reduction functions output array based on different bit depths, and precision desired
+                int precision = ((dstDescPtr->dataType == RpptDataType::F32) || (dstDescPtr->dataType == RpptDataType::F16)) ? 3 : 0;
+                if (dstDescPtr->dataType == RpptDataType::U8)
+                {
+                    if (testCase == 87)
+                        print_array(static_cast<Rpp64u *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                    else
+                        print_array(static_cast<Rpp8u *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                }
+                else if (dstDescPtr->dataType == RpptDataType::F16)
+                {
+                    if (testCase == 87)
+                        print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                    else
+                        print_array(static_cast<Rpp16f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
                 }
-                else
+                else if (dstDescPtr->dataType == RpptDataType::F32)
                 {
-                    CHECK(hipMemcpy(output, d_output, outputBufferSize, hipMemcpyDeviceToHost));
+                    if (testCase == 87)
+                        print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                    else
+                        print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                }
+                else if (dstDescPtr->dataType == RpptDataType::I8)
+                {
+                    if (testCase == 87)
+                        print_array(static_cast<Rpp64s *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                    else
+                        print_array(static_cast<Rpp8s *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                }
+                printf("\n");
 
-                    // Reconvert other bit depths to 8u for output display purposes
-                    convert_output_bitdepth_to_u8(output, outputu8, inputBitDepth, oBufferSize, outputBufferSize, dstDescPtr, invConversionFactor);
+                /*Compare the output of the function with golden outputs only if
+                1.QA Flag is set
+                2.input bit depth 0 (U8)
+                3.source and destination layout are the same*/
+                if(qaFlag && inputBitDepth == 0 && (srcDescPtr->layout == dstDescPtr->layout) && !(randomOutputCase))
+                {
+                    if (testCase == 87)
+                        compare_reduction_output(static_cast<uint64_t *>(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath);
+                    else
+                        compare_reduction_output(static_cast<Rpp8u *>(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath);
+                }
+            }
+            else
+            {
+                CHECK(hipMemcpy(output, d_output, outputBufferSize, hipMemcpyDeviceToHost));
 
-                    // if DEBUG_MODE is set to 1, the output of the first iteration will be dumped to csv files for debugging purposes.
-                    if(DEBUG_MODE && iterCount == 0)
-                    {
-                        std::ofstream refFile;
-                        refFile.open(func + ".csv");
-                        for (int i = 0; i < oBufferSize; i++)
-                            refFile << static_cast<int>(*(outputu8 + i)) << ",";
-                        refFile.close();
-                    }
+                // Reconvert other bit depths to 8u for output display purposes
+                convert_output_bitdepth_to_u8(output, outputu8, inputBitDepth, oBufferSize, outputBufferSize, dstDescPtr, invConversionFactor);
 
-                    /*Compare the output of the function with golden outputs only if
-                    1.QA Flag is set
-                    2.input bit depth 0 (Input U8 && Output U8)
-                    3.source and destination layout are the same
-                    4.augmentation case does not generate random output*/
-                    if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase))
-                        compare_output<Rpp8u>(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, testCase, dst, scriptPath);
-
-                    // Calculate exact dstROI in XYWH format for OpenCV dump
-                    if (roiTypeSrc == RpptRoiType::LTRB)
-                        convert_roi(roiTensorPtrDst, RpptRoiType::XYWH, dstDescPtr->n);
-
-                    // Check if the ROI values for each input is within the bounds of the max buffer allocated
-                    RpptROI roiDefault;
-                    RpptROIPtr roiPtrDefault = &roiDefault;
-                    roiPtrDefault->xywhROI =  {0, 0, static_cast<Rpp32s>(dstDescPtr->w), static_cast<Rpp32s>(dstDescPtr->h)};
-                    for (int i = 0; i < dstDescPtr->n; i++)
-                    {
-                        roiTensorPtrDst[i].xywhROI.roiWidth = std::min(roiPtrDefault->xywhROI.roiWidth - roiTensorPtrDst[i].xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.roiWidth);
-                        roiTensorPtrDst[i].xywhROI.roiHeight = std::min(roiPtrDefault->xywhROI.roiHeight - roiTensorPtrDst[i].xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.roiHeight);
-                        roiTensorPtrDst[i].xywhROI.xy.x = std::max(roiPtrDefault->xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.xy.x);
-                        roiTensorPtrDst[i].xywhROI.xy.y = std::max(roiPtrDefault->xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.xy.y);
-                    }
+                // if DEBUG_MODE is set to 1, the output of the first iteration will be dumped to csv files for debugging purposes.
+                if(DEBUG_MODE && iterCount == 0)
+                {
+                    std::ofstream refFile;
+                    refFile.open(func + ".csv");
+                    for (int i = 0; i < oBufferSize; i++)
+                        refFile << static_cast<int>(*(outputu8 + i)) << ",";
+                    refFile.close();
+                }
 
-                    // Convert any PLN3 outputs to the corresponding PKD3 version for OpenCV dump
-                    if (layoutType == 0 || layoutType == 1)
-                    {
-                        if ((dstDescPtr->c == 3) && (dstDescPtr->layout == RpptLayout::NCHW))
-                            convert_pln3_to_pkd3(outputu8, dstDescPtr);
-                    }
-                    // OpenCV dump (if testType is unit test and QA mode is not set)
-                    if(!qaFlag)
-                        write_image_batch_opencv(dst, outputu8, dstDescPtr, imageNamesStart, dstImgSizes, MAX_IMAGE_DUMP);
+                /*Compare the output of the function with golden outputs only if
+                1.QA Flag is set
+                2.input bit depth 0 (Input U8 && Output U8)
+                3.source and destination layout are the same
+                4.augmentation case does not generate random output*/
+                if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase))
+                    compare_output<Rpp8u>(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, testCase, dst, scriptPath);
+
+                // Calculate exact dstROI in XYWH format for OpenCV dump
+                if (roiTypeSrc == RpptRoiType::LTRB)
+                    convert_roi(roiTensorPtrDst, RpptRoiType::XYWH, dstDescPtr->n);
+
+                // Check if the ROI values for each input is within the bounds of the max buffer allocated
+                RpptROI roiDefault;
+                RpptROIPtr roiPtrDefault = &roiDefault;
+                roiPtrDefault->xywhROI =  {0, 0, static_cast<Rpp32s>(dstDescPtr->w), static_cast<Rpp32s>(dstDescPtr->h)};
+                for (int i = 0; i < dstDescPtr->n; i++)
+                {
+                    roiTensorPtrDst[i].xywhROI.roiWidth = std::min(roiPtrDefault->xywhROI.roiWidth - roiTensorPtrDst[i].xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.roiWidth);
+                    roiTensorPtrDst[i].xywhROI.roiHeight = std::min(roiPtrDefault->xywhROI.roiHeight - roiTensorPtrDst[i].xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.roiHeight);
+                    roiTensorPtrDst[i].xywhROI.xy.x = std::max(roiPtrDefault->xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.xy.x);
+                    roiTensorPtrDst[i].xywhROI.xy.y = std::max(roiPtrDefault->xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.xy.y);
+                }
+
+                // Convert any PLN3 outputs to the corresponding PKD3 version for OpenCV dump
+                if (layoutType == 0 || layoutType == 1)
+                {
+                    if ((dstDescPtr->c == 3) && (dstDescPtr->layout == RpptLayout::NCHW))
+                        convert_pln3_to_pkd3(outputu8, dstDescPtr);
                 }
+                // OpenCV dump (if testType is unit test and QA mode is not set)
+                if(!qaFlag)
+                    write_image_batch_opencv(dst, outputu8, dstDescPtr, imageNamesStart, dstImgSizes, MAX_IMAGE_DUMP);
             }
         }
     }
diff --git a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
index f9f3b9f54..749f51edf 100644
--- a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
+++ b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp
@@ -154,8 +154,7 @@ int main(int argc, char * argv[])
 
     // Run case-wise RPP API and measure time
     int missingFuncFlag = 0;
-    double startWallTime, endWallTime, wallTime;
-    double maxWallTime = 0, minWallTime = 5000, avgWallTime = 0;
+    double maxWallTime = 0, minWallTime = 5000, avgWallTime = 0, wallTime = 0;
     int noOfIterations = (int)noOfFiles / batchSize;
     string testCaseName;
 
@@ -173,63 +172,64 @@ int main(int argc, char * argv[])
     }
 
     printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", funcName.c_str(), numRuns, batchSize);
-    for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++)
+    for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
-        for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
-        {
-            vector<string>::const_iterator dataFilePathStart = dataFilePath.begin() + (iterCount * batchSize);
-            vector<string>::const_iterator dataFilePathEnd = dataFilePathStart + batchSize;
-            nifti_1_header *niftiHeaderTemp = niftiHeader + batchSize * iterCount;
+        vector<string>::const_iterator dataFilePathStart = dataFilePath.begin() + (iterCount * batchSize);
+        vector<string>::const_iterator dataFilePathEnd = dataFilePathStart + batchSize;
+        nifti_1_header *niftiHeaderTemp = niftiHeader + batchSize * iterCount;
 
-            read_nifti_data(dataFilePathStart, dataFilePathEnd, niftiDataArray, niftiHeaderTemp);
+        read_nifti_data(dataFilePathStart, dataFilePathEnd, niftiDataArray, niftiHeaderTemp);
 
-            // optionally pick full image as ROI or a smaller slice of the 3D tensor in X/Y/Z dimensions
-            for(int i = 0; i < batchSize; i++)
-            {
-                // option 1 - test using roi as the whole 3D image - not sliced (example for 240 x 240 x 155 x 1)
-                roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;                                    // start X dim = 0
-                roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                    // start Y dim = 0
-                roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0;                                    // start Z dim = 0
-                roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeaderTemp[i].dim[1];         // length in X dim
-                roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeaderTemp[i].dim[2];        // length in Y dim
-                roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeaderTemp[i].dim[3];         // length in Z dim
-                // option 2 - test using roi as a smaller 3D tensor slice - sliced in X, Y and Z dims (example for 240 x 240 x 155 x 1)
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader.dim[1] / 4;            // start X dim = 60
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = niftiHeader[i].dim[2] / 4;         // start Y dim = 60
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
-                // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] / 2;      // length in X dim = 120
-                // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2] / 2;     // length in Y dim = 120
-                // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
-                // option 3 - test using roi as a smaller 3D tensor slice - sliced in only Z dim (example for 240 x 240 x 155 x 1)
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;                                 // start X dim = 0
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                 // start Y dim = 0
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
-                // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1];          // length in X dim = 240
-                // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2];         // length in Y dim = 240
-                // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
-                // option 4 - test using roi as a smaller 3D tensor slice - sliced in only X and Z dim (example for 240 x 240 x 155 x 1)
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader[i].dim[1] / 5;         // start X dim = 48
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                 // start Y dim = 0
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
-                // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] * 3 / 5;  // length in X dim = 144
-                // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2];         // length in Y dim = 240
-                // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
-            }
+        // optionally pick full image as ROI or a smaller slice of the 3D tensor in X/Y/Z dimensions
+        for(int i = 0; i < batchSize; i++)
+        {
+            // option 1 - test using roi as the whole 3D image - not sliced (example for 240 x 240 x 155 x 1)
+            roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;                                    // start X dim = 0
+            roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                    // start Y dim = 0
+            roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0;                                    // start Z dim = 0
+            roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeaderTemp[i].dim[1];         // length in X dim
+            roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeaderTemp[i].dim[2];        // length in Y dim
+            roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeaderTemp[i].dim[3];         // length in Z dim
+            // option 2 - test using roi as a smaller 3D tensor slice - sliced in X, Y and Z dims (example for 240 x 240 x 155 x 1)
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader.dim[1] / 4;            // start X dim = 60
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = niftiHeader[i].dim[2] / 4;         // start Y dim = 60
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
+            // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] / 2;      // length in X dim = 120
+            // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2] / 2;     // length in Y dim = 120
+            // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
+            // option 3 - test using roi as a smaller 3D tensor slice - sliced in only Z dim (example for 240 x 240 x 155 x 1)
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;                                 // start X dim = 0
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                 // start Y dim = 0
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
+            // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1];          // length in X dim = 240
+            // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2];         // length in Y dim = 240
+            // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
+            // option 4 - test using roi as a smaller 3D tensor slice - sliced in only X and Z dim (example for 240 x 240 x 155 x 1)
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader[i].dim[1] / 5;         // start X dim = 48
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                 // start Y dim = 0
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
+            // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] * 3 / 5;  // length in X dim = 144
+            // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2];         // length in Y dim = 240
+            // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
+        }
 
-            // Convert default NIFTI_DATATYPE unstrided buffer to RpptDataType::F32 strided buffer
-            convert_input_niftitype_to_Rpp32f_generic(niftiDataArray, niftiHeaderTemp, inputF32 , descriptorPtr3D);
+        // Convert default NIFTI_DATATYPE unstrided buffer to RpptDataType::F32 strided buffer
+        convert_input_niftitype_to_Rpp32f_generic(niftiDataArray, niftiHeaderTemp, inputF32 , descriptorPtr3D);
 
-            // Typecast input from F32 to U8 if input bitdepth requested is U8
-            if (inputBitDepth == 0)
-            {
-                for(int i = 0; i < iBufferSizeU8; i++)
-                    inputU8[i] = std::min(std::max(static_cast<unsigned char>(inputF32[i]), static_cast<unsigned char>(0)), static_cast<unsigned char>(255));
-                CHECK(hipMemcpy(d_inputU8, inputU8, iBufferSizeU8, hipMemcpyHostToDevice));
-            }
+        // Typecast input from F32 to U8 if input bitdepth requested is U8
+        if (inputBitDepth == 0)
+        {
+            for(int i = 0; i < iBufferSizeU8; i++)
+                inputU8[i] = std::min(std::max(static_cast<unsigned char>(inputF32[i]), static_cast<unsigned char>(0)), static_cast<unsigned char>(255));
+            CHECK(hipMemcpy(d_inputU8, inputU8, iBufferSizeU8, hipMemcpyHostToDevice));
+        }
 
-            //Copy input buffer to hip
-            CHECK(hipMemcpy(d_inputF32, inputF32, iBufferSizeInBytes, hipMemcpyHostToDevice));
+        //Copy input buffer to hip
+        CHECK(hipMemcpy(d_inputF32, inputF32, iBufferSizeInBytes, hipMemcpyHostToDevice));
 
+        for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++)
+        {
+            double startWallTime, endWallTime;
             switch (testCase)
             {
                 case 0:
@@ -327,108 +327,110 @@ int main(int argc, char * argv[])
             maxWallTime = std::max(maxWallTime, wallTime);
             minWallTime = std::min(minWallTime, wallTime);
             avgWallTime += wallTime;
-            wallTime *= 1000;
-            if (missingFuncFlag == 1)
+        }
+
+        wallTime *= 1000;
+        if (missingFuncFlag == 1)
+        {
+            printf("\nThe functionality doesn't yet exist in RPP\n");
+            return -1;
+        }
+
+        // Copy output buffer to host
+        CHECK(hipMemcpy(outputF32, d_outputF32, oBufferSizeInBytes, hipMemcpyDeviceToHost));
+        if(testType == 0)
+        {
+            cout << "\n\nGPU Backend Wall Time: " << wallTime <<" ms per batch"<< endl;
+            if(DEBUG_MODE)
             {
-                printf("\nThe functionality doesn't yet exist in RPP\n");
-                return -1;
+                std::ofstream refFile;
+                std::string refFileName;
+                if(layoutType == 0)
+                    refFileName = testCaseName + "_nifti_hip_pkd3.csv";
+                else if(layoutType == 1)
+                    refFileName = testCaseName + "_nifti_hip_pln3.csv";
+                else
+                    refFileName = testCaseName + "_nifti_hip_pln1.csv";
+                refFile.open(refFileName);
+                for (int i = 0; i < oBufferSize; i++)
+                    refFile << *(outputF32 + i) << ",";
+                refFile.close();
             }
-            // Copy output buffer to host
-            CHECK(hipMemcpy(outputF32, d_outputF32, oBufferSizeInBytes, hipMemcpyDeviceToHost));
-            if(testType == 0)
-            {
-                cout << "\n\nGPU Backend Wall Time: " << wallTime <<" ms per nifti file"<< endl;
-                if(DEBUG_MODE)
-                {
-                    std::ofstream refFile;
-                    std::string refFileName;
-                    if(layoutType == 0)
-                        refFileName = testCaseName + "_nifti_hip_pkd3.csv";
-                    else if(layoutType == 1)
-                        refFileName = testCaseName + "_nifti_hip_pln3.csv";
-                    else
-                        refFileName = testCaseName + "_nifti_hip_pln1.csv";
-                    refFile.open(refFileName);
-                    for (int i = 0; i < oBufferSize; i++)
-                        refFile << *(outputF32 + i) << ",";
-                    refFile.close();
-                }
 
-                if(inputBitDepth == 0)
-                {
-                    Rpp64u bufferLength = iBufferSize * sizeof(Rpp8u) + descriptorPtr3D->offsetInBytes;
-                    CHECK(hipMemcpy(outputU8, d_outputU8, bufferLength, hipMemcpyDeviceToHost));
+            if(inputBitDepth == 0)
+            {
+                Rpp64u bufferLength = iBufferSize * sizeof(Rpp8u) + descriptorPtr3D->offsetInBytes;
+                CHECK(hipMemcpy(outputU8, d_outputU8, bufferLength, hipMemcpyDeviceToHost));
 
-                    // Copy U8 buffer to F32 buffer for display purposes
-                    for(int i = 0; i < bufferLength; i++)
-                        outputF32[i] = static_cast<float>(outputU8[i]);
-                }
+                // Copy U8 buffer to F32 buffer for display purposes
+                for(int i = 0; i < bufferLength; i++)
+                    outputF32[i] = static_cast<float>(outputU8[i]);
+            }
 
-                /*Compare the output of the function with golden outputs only if
-                1.QA Flag is set
-                2.input bit depth 2 (F32)*/
-                if(qaFlag && inputBitDepth == 2)
-                    compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath);
-                else
+            /*Compare the output of the function with golden outputs only if
+            1.QA Flag is set
+            2.input bit depth 2 (F32)*/
+            if(qaFlag && inputBitDepth == 2)
+                compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath);
+            else
+            {
+                for(int batchCount = 0; batchCount < batchSize; batchCount++)
                 {
-                    for(int batchCount = 0; batchCount < batchSize; batchCount++)
+                    int index = iterCount * batchSize + batchCount;
+                    Rpp32f *outputTemp = outputF32 + batchCount * descriptorPtr3D->strides[0];
+                    for(int i = 0; i < numChannels; i++) // temporary changes to process pln3
                     {
-                        int index = iterCount * batchSize + batchCount;
-                        Rpp32f *outputTemp = outputF32 + batchCount * descriptorPtr3D->strides[0];
-                        for(int i = 0; i < numChannels; i++) // temporary changes to process pln3
+                        int xyFrameSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2];
+                        int xyFrameSizeROI = roiGenericSrcPtr[batchCount].xyzwhdROI.roiWidth * roiGenericSrcPtr[batchCount].xyzwhdROI.roiHeight;
+
+                        uint dataSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2] * niftiHeaderTemp[batchCount].dim[3];
+                        uchar *niftiDataU8 = (uchar *) malloc(dataSize * sizeof(uchar));
+                        uchar *outputBufferOpenCV = (uchar *)calloc(xyFrameSizeROI, sizeof(uchar));
+
+                        // Convert RpptDataType::F32 strided buffer to default NIFTI_DATATYPE unstrided buffer
+                        Rpp64u increment;
+                        if (descriptorPtr3D->layout == RpptLayout::NCDHW)
+                            increment = (Rpp64u)descriptorPtr3D->strides[1];
+                        else
+                            increment = 1;
+                        convert_output_Rpp32f_to_niftitype_generic(outputTemp + i * increment, descriptorPtr3D, niftiDataArray[batchCount], &niftiHeaderTemp[batchCount]);
+                        NIFTI_DATATYPE min = niftiDataArray[batchCount][0];
+                        NIFTI_DATATYPE max = niftiDataArray[batchCount][0];
+                        for (int i = 0; i < dataSize; i++)
+                        {
+                            min = std::min(min, niftiDataArray[batchCount][i]);
+                            max = std::max(max, niftiDataArray[batchCount][i]);
+                        }
+                        Rpp32f multiplier = 255.0f / (max - min);
+                        for (int i = 0; i < dataSize; i++)
+                            niftiDataU8[i] = (uchar)((niftiDataArray[batchCount][i] - min) * multiplier);
+
+                        uchar *niftiDataU8Temp = niftiDataU8;
+                        for (int zPlane = roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z; zPlane < roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z + roiGenericSrcPtr[batchCount].xyzwhdROI.roiDepth; zPlane++)
+                        {
+                            write_image_from_nifti_opencv(niftiDataU8Temp, niftiHeaderTemp[batchCount].dim[1], (RpptRoiXyzwhd *)roiGenericSrcPtr, outputBufferOpenCV, zPlane, i, batchCount, dstPath, testCaseName, index);
+                            niftiDataU8Temp += xyFrameSize;
+                        }
+
+                        write_nifti_file(&niftiHeaderTemp[batchCount], niftiDataArray[batchCount], index, i, dstPath, testCaseName);
+
+                        if(i == 0)
+                        {
+                            std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_0_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
+                            system(command.c_str());
+                        }
+                        if(i == 1)
+                        {
+                            std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_1_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
+                            system(command.c_str());
+                        }
+                        if(i == 2)
                         {
-                            int xyFrameSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2];
-                            int xyFrameSizeROI = roiGenericSrcPtr[batchCount].xyzwhdROI.roiWidth * roiGenericSrcPtr[batchCount].xyzwhdROI.roiHeight;
-
-                            uint dataSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2] * niftiHeaderTemp[batchCount].dim[3];
-                            uchar *niftiDataU8 = (uchar *) malloc(dataSize * sizeof(uchar));
-                            uchar *outputBufferOpenCV = (uchar *)calloc(xyFrameSizeROI, sizeof(uchar));
-
-                            // Convert RpptDataType::F32 strided buffer to default NIFTI_DATATYPE unstrided buffer
-                            Rpp64u increment;
-                            if (descriptorPtr3D->layout == RpptLayout::NCDHW)
-                                increment = (Rpp64u)descriptorPtr3D->strides[1];
-                            else
-                                increment = 1;
-                            convert_output_Rpp32f_to_niftitype_generic(outputTemp + i * increment, descriptorPtr3D, niftiDataArray[batchCount], &niftiHeaderTemp[batchCount]);
-                            NIFTI_DATATYPE min = niftiDataArray[batchCount][0];
-                            NIFTI_DATATYPE max = niftiDataArray[batchCount][0];
-                            for (int i = 0; i < dataSize; i++)
-                            {
-                                min = std::min(min, niftiDataArray[batchCount][i]);
-                                max = std::max(max, niftiDataArray[batchCount][i]);
-                            }
-                            Rpp32f multiplier = 255.0f / (max - min);
-                            for (int i = 0; i < dataSize; i++)
-                                niftiDataU8[i] = (uchar)((niftiDataArray[batchCount][i] - min) * multiplier);
-
-                            uchar *niftiDataU8Temp = niftiDataU8;
-                            for (int zPlane = roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z; zPlane < roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z + roiGenericSrcPtr[batchCount].xyzwhdROI.roiDepth; zPlane++)
-                            {
-                                write_image_from_nifti_opencv(niftiDataU8Temp, niftiHeaderTemp[batchCount].dim[1], (RpptRoiXyzwhd *)roiGenericSrcPtr, outputBufferOpenCV, zPlane, i, batchCount, dstPath, testCaseName, index);
-                                niftiDataU8Temp += xyFrameSize;
-                            }
-
-                            write_nifti_file(&niftiHeaderTemp[batchCount], niftiDataArray[batchCount], index, i, dstPath, testCaseName);
-
-                            if(i == 0)
-                            {
-                                std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_0_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
-                                system(command.c_str());
-                            }
-                            if(i == 1)
-                            {
-                                std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_1_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
-                                system(command.c_str());
-                            }
-                            if(i == 2)
-                            {
-                                std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_2_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
-                                system(command.c_str());
-                            }
-                            free(niftiDataU8);
-                            free(outputBufferOpenCV);
+                            std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_2_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
+                            system(command.c_str());
                         }
+                        free(niftiDataU8);
+                        free(outputBufferOpenCV);
                     }
                 }
             }
diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py
index 2e8054332..e5be154c4 100644
--- a/utilities/test_suite/HIP/runTests.py
+++ b/utilities/test_suite/HIP/runTests.py
@@ -21,13 +21,11 @@
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 """
-
 import os
-import subprocess  # nosec
-import argparse
 import sys
-import datetime
-import shutil
+sys.dont_write_bytecode = True
+sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
+from common import *
 
 # Set the timestamp
 timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
@@ -39,109 +37,8 @@
 qaInputFile = scriptPath + "/../TEST_IMAGES/three_images_mixed_src1"
 outFolderPath = os.getcwd()
 buildFolderPath = os.getcwd()
-
-# Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents
-def validate_and_remove_files(path):
-    if not path:  # check if a string is empty
-        print("Folder path is empty.")
-        exit()
-
-    elif path == "/*":  # check if the root directory is passed to the function
-        print("Root folder cannot be deleted.")
-        exit()
-
-    elif os.path.exists(path):  # check if the folder exists
-        # Get a list of files and directories within the specified path
-        items = os.listdir(path)
-
-        if items:
-            # The directory is not empty, delete its contents
-            for item in items:
-                item_path = os.path.join(path, item)
-                if os.path.isfile(item_path):
-                    os.remove(item_path)
-                elif os.path.isdir(item_path):
-                    shutil.rmtree(item_path)     # Delete the directory if it exists
-
-    else:
-        print("Path is invalid or does not exist.")
-        exit()
-
-# Check if the folder is the root folder or exists, and remove the specified subfolders
-def validate_and_remove_folders(path, folder):
-    if path == "/*":  # check if the root directory is passed to the function
-        print("Root folder cannot be deleted.")
-        exit()
-    if path and os.path.isdir(path):  # checks if directory string is not empty and it exists
-        output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)]
-
-        # Loop through each directory and delete it only if it exists
-        for folder_name in output_folders:
-            folder_path = os.path.join(path, folder_name)
-            if os.path.isdir(folder_path):
-                shutil.rmtree(folder_path)  # Delete the directory if it exists
-                print("Deleted directory:", folder_path)
-            else:
-                print("Directory not found:", folder_path)
-
-# Check if a case file exists and filter its contents based on certain conditions
-def case_file_check(CASE_FILE_PATH):
-    try:
-        case_file = open(CASE_FILE_PATH,'r')
-        for line in case_file:
-            print(line)
-            if not(line.startswith('"Name"')):
-                if TYPE in TENSOR_TYPE_LIST:
-                    new_file.write(line)
-                    d_counter[TYPE] = d_counter[TYPE] + 1
-        case_file.close()
-        return True
-    except IOError:
-        print("Unable to open case results")
-        return False
-
- # Generate a directory name based on certain parameters
-def directory_name_generator(qaMode, affinity, layoutType, case, path):
-    if qaMode == 0:
-        functionality_group = func_group_finder(int(case))
-        dst_folder_temp = f"{path}/rpp_{affinity}_{layoutType}_{functionality_group}"
-    else:
-        dst_folder_temp = path
-
-    return dst_folder_temp
-
-# Process the layout based on the given parameters and generate the directory name and log file layout.
-def process_layout(layout, qaMode, case, dstPath):
-    if layout == 0:
-        dstPathTemp = directory_name_generator(qaMode, "hip", "pkd3", case, dstPath)
-        log_file_layout = "pkd3"
-    elif layout == 1:
-        dstPathTemp = directory_name_generator(qaMode, "hip", "pln3", case, dstPath)
-        log_file_layout = "pln3"
-    elif layout == 2:
-        dstPathTemp = directory_name_generator(qaMode, "hip", "pln1", case, dstPath)
-        log_file_layout = "pln1"
-
-    return dstPathTemp, log_file_layout
-
-# Validate if a path exists and is a directory
-def validate_path(input_path):
-    if not os.path.exists(input_path):
-        raise ValueError("path " + input_path +" does not exist.")
-    if not os.path.isdir(input_path):
-        raise ValueError("path " + input_path + " is not a directory.")
-
-# Create layout directories within a destination path based on a layout dictionary
-def create_layout_directories(dst_path, layout_dict):
-    for layout in range(3):
-        current_layout = layout_dict[layout]
-        try:
-            os.makedirs(dst_path + '/' + current_layout)
-        except FileExistsError:
-            pass
-        folder_list = [f for f in os.listdir(dst_path) if current_layout.lower() in f]
-        for folder in folder_list:
-            os.rename(dst_path + '/' + folder, dst_path + '/' + current_layout +  '/' + folder)
+caseMin = 0
+caseMax = 89
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list(preserveOutput):
@@ -174,32 +71,20 @@ def func_group_finder(case_number):
     else:
         return "miscellaneous"
 
-# Generate performance reports based on counters and a list of types
-def generate_performance_reports(d_counter, TYPE_LIST):
-    import pandas as pd
-    pd.options.display.max_rows = None
-    # Generate performance report
-    for TYPE in TYPE_LIST:
-        print("\n\n\nKernels tested - ", d_counter[TYPE], "\n\n")
-        df = pd.read_csv(RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv")
-        df["AverageMs"] = df["AverageNs"] / 1000000
-        dfPrint = df.drop(['Percentage'], axis = 1)
-        dfPrint["HIP Kernel Name"] = dfPrint.iloc[:,0].str.lstrip("Hip_")
-        dfPrint_noIndices = dfPrint.astype(str)
-        dfPrint_noIndices.replace(['0', '0.0'], '', inplace = True)
-        dfPrint_noIndices = dfPrint_noIndices.to_string(index = False)
-        print(dfPrint_noIndices)
-
 def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n\n\n\n")
     print("--------------------------------")
     print("Running a New Functionality...")
     print("--------------------------------")
-
-    for bitDepth in range(7):
+    bitDepths = range(7)
+    outputFormatToggles = [0, 1]
+    if qaMode:
+        bitDepths = [0]
+        outputFormatToggles = [0]
+    for bitDepth in bitDepths:
         print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
 
-        for outputFormatToggle in range(2):
+        for outputFormatToggle in outputFormatToggles:
             # There is no layout toggle for PLN1 case, so skip this case
             if layout == 2 and outputFormatToggle == 1:
                 continue
@@ -315,11 +200,11 @@ def rpp_test_suite_parser_and_validator():
     parser = argparse.ArgumentParser()
     parser.add_argument("--input_path1", type = str, default = inFilePath1, help = "Path to the input folder 1")
     parser.add_argument("--input_path2", type = str, default = inFilePath2, help = "Path to the input folder 2")
-    parser.add_argument("--case_start", type = int, default = 0, help="Testing range starting case # - (0:90)")
-    parser.add_argument("--case_end", type = int, default = 90, help="Testing range ending case # - (0:90)")
-    parser.add_argument('--test_type', type = int, default = 0, help="Type of Test - (0 = Unit tests / 1 = Performance tests)")
-    parser.add_argument('--case_list', nargs = "+", help="List of case numbers to list", required=False)
-    parser.add_argument('--profiling', type = str , default='NO', help='Run with profiler? - (YES/NO)', required=False)
+    parser.add_argument("--case_start", type = int, default = caseMin, help = "Testing start case # - Range must be in [" + str(caseMin) + ":" + str(caseMax) + "]")
+    parser.add_argument("--case_end", type = int, default = caseMax, help = "Testing end case # - Range must be in [" + str(caseMin) + ":" + str(caseMax) + "]")
+    parser.add_argument('--test_type', type = int, default = 0, help = "Type of Test - (0 = Unit tests / 1 = Performance tests)")
+    parser.add_argument('--case_list', nargs = "+", help = "List of case numbers to list", required = False)
+    parser.add_argument('--profiling', type = str , default = 'NO', help = 'Run with profiler? - (YES/NO)', required = False)
     parser.add_argument('--qa_mode', type = int, default = 0, help = "Run with qa_mode? Output images from tests will be compared with golden outputs - (0 / 1)", required = False)
     parser.add_argument('--decoder_type', type = int, default = 0, help = "Type of Decoder to decode the input data - (0 = TurboJPEG / 1 = OpenCV)")
     parser.add_argument('--num_runs', type = int, default = 1, help = "Specifies the number of runs for running the performance tests")
@@ -334,8 +219,8 @@ def rpp_test_suite_parser_and_validator():
     validate_path(qaInputFile)
 
     # validate the parameters passed by user
-    if ((args.case_start < 0 or args.case_start > 90) or (args.case_end < 0 or args.case_end > 90)):
-        print("Starting case# and Ending case# must be in the 0:90 range. Aborting!")
+    if ((args.case_start < caseMin or args.case_start > caseMax) or (args.case_end < caseMin or args.case_end > caseMax)):
+        print(f"Starting case# and Ending case# must be in the {caseMin}:{caseMax} range. Aborting!")
         exit(0)
     elif args.case_end < args.case_start:
         print("Ending case# must be greater than starting case#. Aborting!")
@@ -349,7 +234,7 @@ def rpp_test_suite_parser_and_validator():
     elif args.decoder_type < 0 or args.decoder_type > 1:
         print("Decoder Type must be in the 0/1 (0 = OpenCV / 1 = TurboJPEG). Aborting")
         exit(0)
-    elif args.case_list is not None and args.case_start > 0 and args.case_end < 90:
+    elif args.case_list is not None and args.case_start > caseMin and args.case_end < caseMax:
         print("Invalid input! Please provide only 1 option between case_list, case_start and case_end")
         exit(0)
     elif args.num_runs <= 0:
@@ -376,9 +261,9 @@ def rpp_test_suite_parser_and_validator():
         args.case_list = [str(x) for x in args.case_list]
     else:
         for case in args.case_list:
-            if int(case) < 0 or int(case) > 90:
-                 print("The case# must be in the 0:90 range!")
-                 exit(0)
+            if int(case) < caseMin or int(case) > caseMax:
+                print(f"Invalid case number {case}! Case number must be in the {caseMin}:{caseMax} range. Aborting!")
+                exit(0)
 
     return args
 
@@ -410,7 +295,7 @@ def rpp_test_suite_parser_and_validator():
     numRuns = 1
 elif(testType == 1):
     if "--num_runs" not in sys.argv:
-        numRuns = 1000 #default numRuns for running performance tests
+        numRuns = 100 #default numRuns for running performance tests
     outFilePath = outFolderPath + "/OUTPUT_PERFORMANCE_LOGS_HIP_" + timestamp
 else:
     print("Invalid TEST_TYPE specified. TEST_TYPE should be 0/1 (0 = Unittests / 1 = Performancetests)")
@@ -438,6 +323,9 @@ def rpp_test_suite_parser_and_validator():
 subprocess.run(["cmake", scriptPath], cwd=".")   # nosec
 subprocess.run(["make", "-j16"], cwd=".")    # nosec
 
+# List of cases supported
+supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '34', '36', '37', '38', '39', '45', '54', '61', '63', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89']
+
 # Create folders based on testType and profilingOption
 if testType == 1 and profilingOption == "YES":
     os.makedirs(f"{dstPath}/Tensor_PKD3")
@@ -451,6 +339,8 @@ def rpp_test_suite_parser_and_validator():
 
 if(testType == 0):
     for case in caseList:
+        if case not in supportedCaseList:
+            continue
         if case == "82" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1):
             srcPath1 = ricapInFilePath
             srcPath2 = ricapInFilePath
@@ -458,11 +348,8 @@ def rpp_test_suite_parser_and_validator():
         if qaMode == 1 and case != "82":
             srcPath1 = inFilePath1
             srcPath2 = inFilePath2
-        if int(case) < 0 or int(case) > 89:
-            print(f"Invalid case number {case}. Case number must be in the range of 0 to 89!")
-            continue
         for layout in range(3):
-            dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath)
+            dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder)
 
             if qaMode == 0:
                 if not os.path.isdir(dstPathTemp):
@@ -476,14 +363,13 @@ def rpp_test_suite_parser_and_validator():
 else:
     if (testType == 1 and profilingOption == "NO"):
         for case in caseList:
-            if int(case) < 0 or int(case) > 89:
-                print(f"Invalid case number {case}. Case number must be in the range of 0 to 89!")
+            if case not in supportedCaseList:
                 continue
             if case == "82" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
                 srcPath1 = ricapInFilePath
                 srcPath2 = ricapInFilePath
             for layout in range(3):
-                dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath)
+                dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder)
 
                 run_performance_test(loggingFolder, log_file_layout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
 
@@ -491,14 +377,13 @@ def rpp_test_suite_parser_and_validator():
         NEW_FUNC_GROUP_LIST = [0, 15, 20, 29, 36, 40, 42, 49, 56, 65, 69]
 
         for case in caseList:
-            if int(case) < 0 or int(case) > 89:
-                print(f"Invalid case number {case}. Case number must be in the range of 0 to 89!")
+            if case not in supportedCaseList:
                 continue
             if case == "82" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv:
                 srcPath1 = ricapInFilePath
                 srcPath2 = ricapInFilePath
             for layout in range(3):
-                dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath)
+                dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder)
 
                 print("\n\n\n\n")
                 print("--------------------------------")
@@ -575,7 +460,7 @@ def rpp_test_suite_parser_and_validator():
                                 # Write into csv file
                                 CASE_FILE_PATH = CASE_RESULTS_DIR + "/output_case" + str(CASE_NUM) + "_bitDepth" + str(BIT_DEPTH) + "_oft" + str(OFT) + "_kernelSize" + str(KSIZE) + ".stats.csv"
                                 print("CASE_FILE_PATH = " + CASE_FILE_PATH)
-                                fileCheck = case_file_check(CASE_FILE_PATH)
+                                fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter)
                                 if fileCheck == False:
                                     continue
                         elif (CASE_NUM == "24" or CASE_NUM == "21" or CASE_NUM == "23") and TYPE.startswith("Tensor"):
@@ -585,7 +470,7 @@ def rpp_test_suite_parser_and_validator():
                                 # Write into csv file
                                 CASE_FILE_PATH = CASE_RESULTS_DIR + "/output_case" + str(CASE_NUM) + "_bitDepth" + str(BIT_DEPTH) + "_oft" + str(OFT) + "_interpolationType" + str(INTERPOLATIONTYPE) + ".stats.csv"
                                 print("CASE_FILE_PATH = " + CASE_FILE_PATH)
-                                fileCheck = case_file_check(CASE_FILE_PATH)
+                                fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter)
                                 if fileCheck == False:
                                     continue
                         elif (CASE_NUM == "8") and TYPE.startswith("Tensor"):
@@ -595,21 +480,21 @@ def rpp_test_suite_parser_and_validator():
                                 # Write into csv file
                                 CASE_FILE_PATH = CASE_RESULTS_DIR + "/output_case" + str(CASE_NUM) + "_bitDepth" + str(BIT_DEPTH) + "_oft" + str(OFT) + "_noiseType" + str(NOISETYPE) + ".stats.csv"
                                 print("CASE_FILE_PATH = " + CASE_FILE_PATH)
-                                fileCheck = case_file_check(CASE_FILE_PATH)
+                                fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter)
                                 if fileCheck == False:
                                     continue
                         else:
                             # Write into csv file
                             CASE_FILE_PATH = CASE_RESULTS_DIR + "/output_case" + str(CASE_NUM) + "_bitDepth" + str(BIT_DEPTH) + "_oft" + str(OFT) + ".stats.csv"
                             print("CASE_FILE_PATH = " + CASE_FILE_PATH)
-                            fileCheck = case_file_check(CASE_FILE_PATH)
+                            fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter)
                             if fileCheck == False:
                                 continue
 
             new_file.close()
             subprocess.call(['chown', '{}:{}'.format(os.getuid(), os.getgid()), RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv"])  # nosec
         try:
-            generate_performance_reports(d_counter, TYPE_LIST)
+            generate_performance_reports(d_counter, TYPE_LIST, RESULTS_DIR)
 
         except ImportError:
             print("\nPandas not available! Results of GPU profiling experiment are available in the following files:\n" + \
@@ -634,91 +519,14 @@ def rpp_test_suite_parser_and_validator():
     "statistical_operations"
     ]
     for log_file in log_file_list:
-        # Opening log file
-        try:
-            f = open(log_file,"r")
-            print("\n\n\nOpened log file -> " + log_file)
-        except IOError:
-            print("Skipping file -> " + log_file)
-            continue
-
-        stats = []
-        maxVals = []
-        minVals = []
-        avgVals = []
-        functions = []
-        frames = []
-        prevLine = ""
-        funcCount = 0
-
-        # Loop over each line
-        for line in f:
-            for functionality_group in functionality_group_list:
-                if functionality_group in line:
-                    functions.extend([" ", functionality_group, " "])
-                    frames.extend([" ", " ", " "])
-                    maxVals.extend([" ", " ", " "])
-                    minVals.extend([" ", " ", " "])
-                    avgVals.extend([" ", " ", " "])
-
-            if "max,min,avg wall times in ms/batch" in line:
-                split_word_start = "Running "
-                split_word_end = " "+ str(numRuns)
-                prevLine = prevLine.partition(split_word_start)[2].partition(split_word_end)[0]
-                if prevLine not in functions:
-                    functions.append(prevLine)
-                    frames.append(str(numRuns))
-                    split_word_start = "max,min,avg wall times in ms/batch = "
-                    split_word_end = "\n"
-                    stats = line.partition(split_word_start)[2].partition(split_word_end)[0].split(",")
-                    maxVals.append(stats[0])
-                    minVals.append(stats[1])
-                    avgVals.append(stats[2])
-                    funcCount += 1
-
-            if line != "\n":
-                prevLine = line
-
-        # Print log lengths
-        print("Functionalities - " + str(funcCount))
-
-        # Print summary of log
-        print("\n\nFunctionality\t\t\t\t\t\tFrames Count\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n")
-        if len(functions) != 0:
-            maxCharLength = len(max(functions, key = len))
-            functions = [x + (' ' * (maxCharLength - len(x))) for x in functions]
-            for i, func in enumerate(functions):
-                print(func + "\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t" + str(minVals[i]) + "\t" + str(avgVals[i]))
-        else:
-            print("No variants under this category")
-
-        # Closing log file
-        f.close()
+        print_performance_tests_summary(log_file, functionality_group_list, numRuns)
 
 # print the results of qa tests
-supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '34', '36', '37', '38', '39', '45', '54', '61', '63', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89']
 nonQACaseList = ['8', '24', '54', '84'] # Add cases present in supportedCaseList, but without QA support
 
 if qaMode and testType == 0:
     qaFilePath = os.path.join(outFilePath, "QA_results.txt")
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
-        f = open(qaFilePath, 'r+')
         print("---------------------------------- Results of QA Test - Tensor_hip ----------------------------------\n")
-        numLines = 0
-        numPassed = 0
-        for line in f:
-            sys.stdout.write(line)
-            numLines += 1
-            if "PASSED" in line:
-                numPassed += 1
-            sys.stdout.flush()
-        resultsInfo = "\n\nFinal Results of Tests:"
-        resultsInfo += "\n    - Total test cases including all subvariants REQUESTED = " + str(numLines)
-        resultsInfo += "\n    - Total test cases including all subvariants PASSED = " + str(numPassed)
-        resultsInfo += "\n\nGeneral information on Tensor test suite availability:"
-        resultsInfo += "\n    - Total augmentations supported in Tensor test suite = " + str(len(supportedCaseList))
-        resultsInfo += "\n    - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList))
-        resultsInfo += "\n    - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList))
-        f.write(resultsInfo)
-    print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------")
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
diff --git a/utilities/test_suite/HIP/runTests_voxel.py b/utilities/test_suite/HIP/runTests_voxel.py
index d645b03fe..4ae869b75 100644
--- a/utilities/test_suite/HIP/runTests_voxel.py
+++ b/utilities/test_suite/HIP/runTests_voxel.py
@@ -23,11 +23,10 @@
 """
 
 import os
-import subprocess  # nosec
-import argparse
 import sys
-import datetime
-import shutil
+sys.dont_write_bytecode = True
+sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
+from common import *
 
 # Set the timestamp
 timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
@@ -41,105 +40,6 @@
 caseMin = 0
 caseMax = 5
 
-# Check if folder path is empty, if it is the root folder, or if it exists, and remove its contents
-def validate_and_remove_contents(path):
-    if not path:  # check if a string is empty
-        print("Folder path is empty.")
-        exit()
-    elif path == "/*":  # check if the root directory is passed to the function
-        print("Root folder cannot be deleted.")
-        exit()
-    elif os.path.exists(path):  # check if the folder exists
-        # Get a list of files and directories within the specified path
-        items = os.listdir(path)
-
-        if items:
-            # The directory is not empty, delete its contents
-            for item in items:
-                item_path = os.path.join(path, item)
-                if os.path.isfile(item_path):
-                    os.remove(item_path)
-                elif os.path.isdir(item_path):
-                    shutil.rmtree(item_path)     # Delete the directory if it exists
-    else:
-        print("Path is invalid or does not exist.")
-        exit()
-
-# Check if the folder is the root folder or exists, and remove the specified subfolders
-def validate_and_remove_folders(path, folder):
-    if path == "/*":  # check if the root directory is passed to the function
-        print("Root folder cannot be deleted.")
-        exit()
-    if path and os.path.isdir(path):  # checks if directory string is not empty and it exists
-        output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)]
-
-        # Loop through each directory and delete it only if it exists
-        for folder_name in output_folders:
-            folder_path = os.path.join(path, folder_name)
-            if os.path.isdir(folder_path):
-                shutil.rmtree(folder_path)  # Delete the directory if it exists
-                print("Deleted directory:", folder_path)
-            else:
-                print("Directory not found:", folder_path)
-
-def case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file):
-    try:
-        case_file = open(CASE_FILE_PATH,'r')
-        for line in case_file:
-            print(line)
-            if not(line.startswith('"Name"')):
-                if TYPE in TENSOR_TYPE_LIST:
-                    new_file.write(line)
-                    d_counter[TYPE] = d_counter[TYPE] + 1
-        case_file.close()
-        return True
-    except IOError:
-        print("Unable to open case results")
-        return False
-
-# Generate a directory name based on certain parameters
-def directory_name_generator(qaMode, affinity, layoutType, case, path):
-    if qaMode == 0:
-        functionality_group = func_group_finder(int(case))
-        dst_folder_temp = "{}/rpp_{}_{}_{}".format(path, affinity, layoutType, functionality_group)
-    else:
-        dst_folder_temp = path
-
-    return dst_folder_temp
-
-# Process the layout based on the given parameters and generate the directory name and log file layout.
-def process_layout(layout, qaMode, case, dstPath):
-    if layout == 0:
-        dstPathTemp = directory_name_generator(qaMode, "hip", "pkd3", case, dstPath)
-        logFileLayout = "pkd3"
-    elif layout == 1:
-        dstPathTemp = directory_name_generator(qaMode, "hip", "pln3", case, dstPath)
-        logFileLayout = "pln3"
-    elif layout == 2:
-        dstPathTemp = directory_name_generator(qaMode, "hip", "pln1", case, dstPath)
-        logFileLayout = "pln1"
-
-    return dstPathTemp, logFileLayout
-
-# Validate if a path exists and is a directory
-def validate_path(input_path):
-    if not os.path.exists(input_path):
-        raise ValueError("path " + input_path +" does not exist.")
-    if not os.path.isdir(input_path):
-        raise ValueError("path " + input_path + " is not a directory.")
-
-# Create layout directories within a destination path based on a layout dictionary
-def create_layout_directories(dst_path, layout_dict):
-    for layout in range(3):
-        current_layout = layout_dict[layout]
-        try:
-            os.makedirs(dst_path + '/' + current_layout)
-        except FileExistsError:
-            pass
-        folder_list = [f for f in os.listdir(dst_path) if current_layout.lower() in f]
-        for folder in folder_list:
-            os.rename(dst_path + '/' + folder, dst_path + '/' + current_layout +  '/' + folder)
-
 def get_log_file_list(preserveOutput):
     return [
         outFolderPath + "/OUTPUT_PERFORMANCE_LOGS_HIP_VOXEL_" + timestamp + "/Tensor_voxel_hip_pkd3_raw_performance_log.txt",
@@ -156,28 +56,14 @@ def func_group_finder(case_number):
     else:
         return "miscellaneous"
 
-# Generate performance reports based on counters and a list of types
-def generate_performance_reports(d_counter, TYPE_LIST):
-    import pandas as pd
-    pd.options.display.max_rows = None
-    # Generate performance report
-    for TYPE in TYPE_LIST:
-        print("\n\n\nKernels tested - ", d_counter[TYPE], "\n\n")
-        df = pd.read_csv(RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv")
-        df["AverageMs"] = df["AverageNs"] / 1000000
-        dfPrint = df.drop(['Percentage'], axis = 1)
-        dfPrint["HIP Kernel Name"] = dfPrint.iloc[:,0].str.lstrip("Hip_")
-        dfPrint_noIndices = dfPrint.astype(str)
-        dfPrint_noIndices.replace(['0', '0.0'], '', inplace = True)
-        dfPrint_noIndices = dfPrint_noIndices.to_string(index = False)
-        print(dfPrint_noIndices)
-
 def run_unit_test(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     print("\n\n\n\n")
     print("--------------------------------")
     print("Running a New Functionality...")
     print("--------------------------------")
     bitDepths = [0, 2]
+    if qaMode:
+        bitDepths = [2]
     for bitDepth in bitDepths:
         print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
         print(f"./Tensor_voxel_hip {headerPath} {dataPath} {dstPathTemp} {layout} {case} {numRuns} {testType} {qaMode} {batchSize} {bitDepth}")
@@ -365,7 +251,7 @@ def rpp_test_suite_parser_and_validator():
 dstPath = outFilePath
 
 # Validate DST_FOLDER
-validate_and_remove_contents(dstPath)
+validate_and_remove_files(dstPath)
 
 # Enable extglob
 if os.path.exists(buildFolderPath + "/build"):
@@ -377,6 +263,9 @@ def rpp_test_suite_parser_and_validator():
 subprocess.run(["cmake", scriptPath], cwd=".")   # nosec
 subprocess.run(["make", "-j16"], cwd=".")  # nosec
 
+# List of cases supported
+supportedCaseList = ['0', '1', '2', '3', '5']
+
 # Create folders based on testType and profilingOption
 if testType == 1 and profilingOption == "YES":
     os.makedirs(f"{dstPath}/Tensor_PKD3")
@@ -390,8 +279,10 @@ def rpp_test_suite_parser_and_validator():
 
 if testType == 0:
     for case in caseList:
+        if case not in supportedCaseList:
+            continue
         for layout in range(3):
-            dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath)
+            dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder)
             if qaMode == 0:
                 if not os.path.isdir(dstPathTemp):
                     os.mkdir(dstPathTemp)
@@ -399,14 +290,18 @@ def rpp_test_suite_parser_and_validator():
             run_unit_test(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize)
 elif (testType == 1 and profilingOption == "NO"):
     for case in caseList:
+        if case not in supportedCaseList:
+            continue
         for layout in range(3):
-            dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath)
+            dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder)
             run_performance_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize)
 elif (testType == 1 and profilingOption == "YES"):
     NEW_FUNC_GROUP_LIST = [0, 1]
     for case in caseList:
+        if case not in supportedCaseList:
+            continue
         for layout in range(3):
-            dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath)
+            dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder)
             run_performance_test_with_profiler(loggingFolder, logFileLayout, dstPath, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize)
 
         RESULTS_DIR = ""
@@ -451,14 +346,14 @@ def rpp_test_suite_parser_and_validator():
                         # Write into csv file
                         CASE_FILE_PATH = CASE_RESULTS_DIR + "/output_case" + str(CASE_NUM) + ".stats.csv"
                         print("CASE_FILE_PATH = " + CASE_FILE_PATH)
-                        fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file)
+                        fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter)
                         if fileCheck == False:
                             continue
 
             new_file.close()
             subprocess.call(['chown', '{}:{}'.format(os.getuid(), os.getgid()), RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv"])  # nosec
         try:
-            generate_performance_reports(d_counter, TYPE_LIST)
+            generate_performance_reports(d_counter, TYPE_LIST, RESULTS_DIR)
 
         except ImportError:
             print("\nPandas not available! Results of GPU profiling experiment are available in the following files:\n" + \
@@ -470,32 +365,14 @@ def rpp_test_suite_parser_and_validator():
             print("Unable to open results in " + RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv")
 
 # print the results of qa tests
-supportedCaseList = ['0', '1', '2', '3', '5']
 nonQACaseList = [] # Add cases present in supportedCaseList, but without QA support
 
 if qaMode and testType == 0:
     qaFilePath = os.path.join(outFilePath, "QA_results.txt")
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
-        f = open(qaFilePath, 'r+')
         print("---------------------------------- Results of QA Test - Tensor_voxel_hip ----------------------------------\n")
-        numLines = 0
-        numPassed = 0
-        for line in f:
-            sys.stdout.write(line)
-            numLines += 1
-            if "PASSED" in line:
-                numPassed += 1
-            sys.stdout.flush()
-        resultsInfo = "\n\nFinal Results of Tests:"
-        resultsInfo += "\n    - Total test cases including all subvariants REQUESTED = " + str(numLines)
-        resultsInfo += "\n    - Total test cases including all subvariants PASSED = " + str(numPassed)
-        resultsInfo += "\n\nGeneral information on Tensor voxel test suite availability:"
-        resultsInfo += "\n    - Total augmentations supported in Tensor test suite = " + str(len(supportedCaseList))
-        resultsInfo += "\n    - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList))
-        resultsInfo += "\n    - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList))
-        f.write(resultsInfo)
-    print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------")
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
 
 layoutDict = {0:"PKD3", 1:"PLN3", 2:"PLN1"}
 if (testType == 0 and qaMode == 0): # Unit tests
@@ -509,63 +386,4 @@ def rpp_test_suite_parser_and_validator():
     ]
 
     for log_file in log_file_list:
-        # Opening log file
-        try:
-            f = open(log_file,"r")
-            print("\n\n\nOpened log file -> "+ log_file)
-        except IOError:
-            print("Skipping file -> "+ log_file)
-            continue
-
-        stats = []
-        maxVals = []
-        minVals = []
-        avgVals = []
-        functions = []
-        frames = []
-        prevLine = ""
-        funcCount = 0
-
-        # Loop over each line
-        for line in f:
-            for functionality_group in functionality_group_list:
-                if functionality_group in line:
-                    functions.extend([" ", functionality_group, " "])
-                    frames.extend([" ", " ", " "])
-                    maxVals.extend([" ", " ", " "])
-                    minVals.extend([" ", " ", " "])
-                    avgVals.extend([" ", " ", " "])
-
-            if "max,min,avg wall times in ms/batch" in line:
-                split_word_start = "Running "
-                split_word_end = " " +str(numRuns)
-                prevLine = prevLine.partition(split_word_start)[2].partition(split_word_end)[0]
-                if prevLine not in functions:
-                    functions.append(prevLine)
-                    frames.append(numRuns)
-                    split_word_start = "max,min,avg wall times in ms/batch = "
-                    split_word_end = "\n"
-                    stats = line.partition(split_word_start)[2].partition(split_word_end)[0].split(",")
-                    maxVals.append(stats[0])
-                    minVals.append(stats[1])
-                    avgVals.append(stats[2])
-                    funcCount += 1
-
-            if line != "\n":
-                prevLine = line
-
-        # Print log lengths
-        print("Functionalities - "+ str(funcCount))
-
-        # Print summary of log
-        print("\n\nFunctionality\t\t\t\t\t\tFrames Count\t\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n")
-        if len(functions) != 0:
-            maxCharLength = len(max(functions, key = len))
-            functions = [x + (' ' * (maxCharLength - len(x))) for x in functions]
-            for i, func in enumerate(functions):
-                print(func + "\t\t\t\t\t\t\t\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t\t" + str(minVals[i]) + "\t\t" + str(avgVals[i]))
-        else:
-            print("No variants under this category")
-
-        # Closing log file
-        f.close()
\ No newline at end of file
+        print_performance_tests_summary(log_file, functionality_group_list, numRuns)
diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp
index b698a2def..e93eb143a 100644
--- a/utilities/test_suite/HOST/Tensor_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_host.cpp
@@ -347,73 +347,73 @@ int main(int argc, char **argv)
 
     // case-wise RPP API and measure time script for Unit and Performance test
     printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", func.c_str(), numRuns, batchSize);
-    for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++)
+    for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
-        for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
-        {
-            vector<string>::const_iterator imagesPathStart = imageNamesPath.begin() + (iterCount * batchSize);
-            vector<string>::const_iterator imagesPathEnd = imagesPathStart + batchSize;
-            vector<string>::const_iterator imageNamesStart = imageNames.begin() + (iterCount * batchSize);
-            vector<string>::const_iterator imageNamesEnd = imageNamesStart + batchSize;
-            vector<string>::const_iterator imagesPathSecondStart = imageNamesPathSecond.begin() + (iterCount * batchSize);
-            vector<string>::const_iterator imagesPathSecondEnd = imagesPathSecondStart + batchSize;
-
-            // Set ROIs for src/dst
-            set_src_and_dst_roi(imagesPathStart, imagesPathEnd, roiTensorPtrSrc, roiTensorPtrDst, dstImgSizes);
+        vector<string>::const_iterator imagesPathStart = imageNamesPath.begin() + (iterCount * batchSize);
+        vector<string>::const_iterator imagesPathEnd = imagesPathStart + batchSize;
+        vector<string>::const_iterator imageNamesStart = imageNames.begin() + (iterCount * batchSize);
+        vector<string>::const_iterator imageNamesEnd = imageNamesStart + batchSize;
+        vector<string>::const_iterator imagesPathSecondStart = imageNamesPathSecond.begin() + (iterCount * batchSize);
+        vector<string>::const_iterator imagesPathSecondEnd = imagesPathSecondStart + batchSize;
+
+        // Set ROIs for src/dst
+        set_src_and_dst_roi(imagesPathStart, imagesPathEnd, roiTensorPtrSrc, roiTensorPtrDst, dstImgSizes);
+
+        //Read images
+        if(decoderType == 0)
+            read_image_batch_turbojpeg(inputu8, srcDescPtr, imagesPathStart);
+        else
+            read_image_batch_opencv(inputu8, srcDescPtr, imagesPathStart);
+
+        // if the input layout requested is PLN3, convert PKD3 inputs to PLN3 for first and second input batch
+        if (layoutType == 1)
+            convert_pkd3_to_pln3(inputu8, srcDescPtr);
 
-            //Read images
+        if(dualInputCase)
+        {
             if(decoderType == 0)
-                read_image_batch_turbojpeg(inputu8, srcDescPtr, imagesPathStart);
+                read_image_batch_turbojpeg(inputu8Second, srcDescPtr, imagesPathSecondStart);
             else
-                read_image_batch_opencv(inputu8, srcDescPtr, imagesPathStart);
-
-            // if the input layout requested is PLN3, convert PKD3 inputs to PLN3 for first and second input batch
+                read_image_batch_opencv(inputu8Second, srcDescPtr, imagesPathSecondStart);
             if (layoutType == 1)
-                convert_pkd3_to_pln3(inputu8, srcDescPtr);
-
-            if(dualInputCase)
-            {
-                if(decoderType == 0)
-                    read_image_batch_turbojpeg(inputu8Second, srcDescPtr, imagesPathSecondStart);
-                else
-                    read_image_batch_opencv(inputu8Second, srcDescPtr, imagesPathSecondStart);
-                if (layoutType == 1)
-                    convert_pkd3_to_pln3(inputu8Second, srcDescPtr);
-            }
+                convert_pkd3_to_pln3(inputu8Second, srcDescPtr);
+        }
 
-            // Convert inputs to correponding bit depth specified by user
-            convert_input_bitdepth(input, input_second, inputu8, inputu8Second, inputBitDepth, ioBufferSize, inputBufferSize, srcDescPtr, dualInputCase, conversionFactor);
+        // Convert inputs to correponding bit depth specified by user
+        convert_input_bitdepth(input, input_second, inputu8, inputu8Second, inputBitDepth, ioBufferSize, inputBufferSize, srcDescPtr, dualInputCase, conversionFactor);
 
-            int roiHeightList[batchSize], roiWidthList[batchSize];
-            if(roiList[0] == 0 && roiList[1] == 0 && roiList[2] == 0 && roiList[3] == 0)
+        int roiHeightList[batchSize], roiWidthList[batchSize];
+        if(roiList[0] == 0 && roiList[1] == 0 && roiList[2] == 0 && roiList[3] == 0)
+        {
+            for(int i = 0; i < batchSize ; i++)
             {
-                for(int i = 0; i < batchSize ; i++)
-                {
-                    roiList[0] = 10;
-                    roiList[1] = 10;
-                    roiWidthList[i] = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
-                    roiHeightList[i] = roiTensorPtrSrc[i].xywhROI.roiHeight / 2;
-                }
+                roiList[0] = 10;
+                roiList[1] = 10;
+                roiWidthList[i] = roiTensorPtrSrc[i].xywhROI.roiWidth / 2;
+                roiHeightList[i] = roiTensorPtrSrc[i].xywhROI.roiHeight / 2;
             }
-            else
+        }
+        else
+        {
+            for(int i = 0; i < batchSize ; i++)
             {
-                for(int i = 0; i < batchSize ; i++)
-                {
-                    roiWidthList[i] = roiList[2];
-                    roiHeightList[i] = roiList[3];
-                }
+                roiWidthList[i] = roiList[2];
+                roiHeightList[i] = roiList[3];
             }
+        }
 
-            // Uncomment to run test case with an xywhROI override
-            // roi.xywhROI = {0, 0, 25, 25};
-            // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize);
-            // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize);
+        // Uncomment to run test case with an xywhROI override
+        // roi.xywhROI = {0, 0, 25, 25};
+        // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize);
+        // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize);
 
-            // Uncomment to run test case with an ltrbROI override
-            // roiTypeSrc = RpptRoiType::LTRB;
-            // convert_roi(roiTensorPtrSrc, roiTypeSrc, batchSize);
-            // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize);
+        // Uncomment to run test case with an ltrbROI override
+        // roiTypeSrc = RpptRoiType::LTRB;
+        // convert_roi(roiTensorPtrSrc, roiTypeSrc, batchSize);
+        // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize);
 
+        for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++)
+        {
             clock_t startCpuTime, endCpuTime;
             double startWallTime, endWallTime;
             switch (testCase)
@@ -1111,119 +1111,119 @@ int main(int argc, char **argv)
             maxWallTime = std::max(maxWallTime, wallTime);
             minWallTime = std::min(minWallTime, wallTime);
             avgWallTime += wallTime;
-            cpuTime *= 1000;
-            wallTime *= 1000;
+        }
+        cpuTime *= 1000;
+        wallTime *= 1000;
 
-            if (testType == 0)
-            {
-                cout <<"\n\n";
-                cout <<"CPU Backend Clock Time: "<< cpuTime <<" ms/batch"<< endl;
-                cout <<"CPU Backend Wall Time: "<< wallTime <<" ms/batch"<< endl;
+        if (testType == 0)
+        {
+            cout <<"\n\n";
+            cout <<"CPU Backend Clock Time: "<< cpuTime <<" ms/batch"<< endl;
+            cout <<"CPU Backend Wall Time: "<< wallTime <<" ms/batch"<< endl;
 
-                if (reductionTypeCase)
+            if (reductionTypeCase)
+            {
+                if(srcDescPtr->c == 3)
+                    printf("\nReduction result (Batch of 3 channel images produces 4 results per image in batch): ");
+                else if(srcDescPtr->c == 1)
                 {
-                    if(srcDescPtr->c == 3)
-                        printf("\nReduction result (Batch of 3 channel images produces 4 results per image in batch): ");
-                    else if(srcDescPtr->c == 1)
-                    {
-                        printf("\nReduction result (Batch of 1 channel images produces 1 result per image in batch): ");
-                        reductionFuncResultArrLength = srcDescPtr->n;
-                    }
-
-                    // print reduction functions output array based on different bit depths, and precision desired
-                    int precision = ((dstDescPtr->dataType == RpptDataType::F32) || (dstDescPtr->dataType == RpptDataType::F16)) ? 3 : 0;
-                    if (dstDescPtr->dataType == RpptDataType::U8)
-                    {
-                        if (testCase == 87)
-                            print_array(static_cast<Rpp64u *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                        else
-                            print_array(static_cast<Rpp8u *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                    }
-                    else if (dstDescPtr->dataType == RpptDataType::F16)
-                    {
-                        if (testCase == 87)
-                            print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                        else
-                            print_array(static_cast<Rpp16f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                    }
-                    else if (dstDescPtr->dataType == RpptDataType::F32)
-                    {
-                        if (testCase == 87)
-                            print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                        else
-                            print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                    }
-                    else if (dstDescPtr->dataType == RpptDataType::I8)
-                    {
-                        if (testCase == 87)
-                            print_array(static_cast<Rpp64s *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                        else
-                            print_array(static_cast<Rpp8s *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
-                    }
-                    printf("\n");
+                    printf("\nReduction result (Batch of 1 channel images produces 1 result per image in batch): ");
+                    reductionFuncResultArrLength = srcDescPtr->n;
+                }
 
-                    /*Compare the output of the function with golden outputs only if
-                    1.QA Flag is set
-                    2.input bit depth 0 (U8)
-                    3.source and destination layout are the same*/
-                    if(qaFlag && inputBitDepth == 0 && (srcDescPtr->layout == dstDescPtr->layout) && !(randomOutputCase))
-                    {
-                        if (testCase == 87)
-                            compare_reduction_output(static_cast<uint64_t *>(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath);
-                        else
-                            compare_reduction_output(static_cast<Rpp8u *>(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath);
-                    }
+                // print reduction functions output array based on different bit depths, and precision desired
+                int precision = ((dstDescPtr->dataType == RpptDataType::F32) || (dstDescPtr->dataType == RpptDataType::F16)) ? 3 : 0;
+                if (dstDescPtr->dataType == RpptDataType::U8)
+                {
+                    if (testCase == 87)
+                        print_array(static_cast<Rpp64u *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                    else
+                        print_array(static_cast<Rpp8u *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
                 }
-                else
+                else if (dstDescPtr->dataType == RpptDataType::F16)
                 {
-                    // Reconvert other bit depths to 8u for output display purposes
-                    convert_output_bitdepth_to_u8(output, outputu8, inputBitDepth, oBufferSize, outputBufferSize, dstDescPtr, invConversionFactor);
+                    if (testCase == 87)
+                        print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                    else
+                        print_array(static_cast<Rpp16f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                }
+                else if (dstDescPtr->dataType == RpptDataType::F32)
+                {
+                    if (testCase == 87)
+                        print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                    else
+                        print_array(static_cast<Rpp32f *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                }
+                else if (dstDescPtr->dataType == RpptDataType::I8)
+                {
+                    if (testCase == 87)
+                        print_array(static_cast<Rpp64s *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                    else
+                        print_array(static_cast<Rpp8s *>(reductionFuncResultArr), reductionFuncResultArrLength, precision);
+                }
+                printf("\n");
 
-                    // If DEBUG_MODE is set to 1 dump the outputs to csv files for debugging
-                    if(DEBUG_MODE && iterCount == 0)
-                    {
-                        std::ofstream refFile;
-                        refFile.open(func + ".csv");
-                        for (int i = 0; i < oBufferSize; i++)
-                            refFile << static_cast<int>(*(outputu8 + i)) << ",";
-                        refFile.close();
-                    }
+                /*Compare the output of the function with golden outputs only if
+                1.QA Flag is set
+                2.input bit depth 0 (U8)
+                3.source and destination layout are the same*/
+                if(qaFlag && inputBitDepth == 0 && (srcDescPtr->layout == dstDescPtr->layout) && !(randomOutputCase))
+                {
+                    if (testCase == 87)
+                        compare_reduction_output(static_cast<uint64_t *>(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath);
+                    else
+                        compare_reduction_output(static_cast<Rpp8u *>(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath);
+                }
+            }
+            else
+            {
+                // Reconvert other bit depths to 8u for output display purposes
+                convert_output_bitdepth_to_u8(output, outputu8, inputBitDepth, oBufferSize, outputBufferSize, dstDescPtr, invConversionFactor);
 
-                    /*Compare the output of the function with golden outputs only if
-                    1.QA Flag is set
-                    2.input bit depth 0 (Input U8 && Output U8)
-                    3.source and destination layout are the same
-                    4.augmentation case does not generate random output*/
-                    if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase))
-                        compare_output<Rpp8u>(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, testCase, dst, scriptPath);
-
-                    // Calculate exact dstROI in XYWH format for OpenCV dump
-                    if (roiTypeSrc == RpptRoiType::LTRB)
-                        convert_roi(roiTensorPtrDst, RpptRoiType::XYWH, dstDescPtr->n);
-
-                    // Check if the ROI values for each input is within the bounds of the max buffer allocated
-                    RpptROI roiDefault;
-                    RpptROIPtr roiPtrDefault = &roiDefault;
-                    roiPtrDefault->xywhROI =  {0, 0, static_cast<Rpp32s>(dstDescPtr->w), static_cast<Rpp32s>(dstDescPtr->h)};
-                    for (int i = 0; i < dstDescPtr->n; i++)
-                    {
-                        roiTensorPtrDst[i].xywhROI.roiWidth = std::min(roiPtrDefault->xywhROI.roiWidth - roiTensorPtrDst[i].xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.roiWidth);
-                        roiTensorPtrDst[i].xywhROI.roiHeight = std::min(roiPtrDefault->xywhROI.roiHeight - roiTensorPtrDst[i].xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.roiHeight);
-                        roiTensorPtrDst[i].xywhROI.xy.x = std::max(roiPtrDefault->xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.xy.x);
-                        roiTensorPtrDst[i].xywhROI.xy.y = std::max(roiPtrDefault->xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.xy.y);
-                    }
+                // If DEBUG_MODE is set to 1 dump the outputs to csv files for debugging
+                if(DEBUG_MODE && iterCount == 0)
+                {
+                    std::ofstream refFile;
+                    refFile.open(func + ".csv");
+                    for (int i = 0; i < oBufferSize; i++)
+                        refFile << static_cast<int>(*(outputu8 + i)) << ",";
+                    refFile.close();
+                }
 
-                    // Convert any PLN3 outputs to the corresponding PKD3 version for OpenCV dump
-                    if (layoutType == 0 || layoutType == 1)
-                    {
-                        if ((dstDescPtr->c == 3) && (dstDescPtr->layout == RpptLayout::NCHW))
-                            convert_pln3_to_pkd3(outputu8, dstDescPtr);
-                    }
+                /*Compare the output of the function with golden outputs only if
+                1.QA Flag is set
+                2.input bit depth 0 (Input U8 && Output U8)
+                3.source and destination layout are the same
+                4.augmentation case does not generate random output*/
+                if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase))
+                    compare_output<Rpp8u>(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, testCase, dst, scriptPath);
+
+                // Calculate exact dstROI in XYWH format for OpenCV dump
+                if (roiTypeSrc == RpptRoiType::LTRB)
+                    convert_roi(roiTensorPtrDst, RpptRoiType::XYWH, dstDescPtr->n);
+
+                // Check if the ROI values for each input is within the bounds of the max buffer allocated
+                RpptROI roiDefault;
+                RpptROIPtr roiPtrDefault = &roiDefault;
+                roiPtrDefault->xywhROI =  {0, 0, static_cast<Rpp32s>(dstDescPtr->w), static_cast<Rpp32s>(dstDescPtr->h)};
+                for (int i = 0; i < dstDescPtr->n; i++)
+                {
+                    roiTensorPtrDst[i].xywhROI.roiWidth = std::min(roiPtrDefault->xywhROI.roiWidth - roiTensorPtrDst[i].xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.roiWidth);
+                    roiTensorPtrDst[i].xywhROI.roiHeight = std::min(roiPtrDefault->xywhROI.roiHeight - roiTensorPtrDst[i].xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.roiHeight);
+                    roiTensorPtrDst[i].xywhROI.xy.x = std::max(roiPtrDefault->xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.xy.x);
+                    roiTensorPtrDst[i].xywhROI.xy.y = std::max(roiPtrDefault->xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.xy.y);
+                }
 
-                    // OpenCV dump (if testType is unit test and QA mode is not set)
-                    if(!qaFlag)
-                        write_image_batch_opencv(dst, outputu8, dstDescPtr, imageNamesStart, dstImgSizes, MAX_IMAGE_DUMP);
+                // Convert any PLN3 outputs to the corresponding PKD3 version for OpenCV dump
+                if (layoutType == 0 || layoutType == 1)
+                {
+                    if ((dstDescPtr->c == 3) && (dstDescPtr->layout == RpptLayout::NCHW))
+                        convert_pln3_to_pkd3(outputu8, dstDescPtr);
                 }
+
+                // OpenCV dump (if testType is unit test and QA mode is not set)
+                if(!qaFlag)
+                    write_image_batch_opencv(dst, outputu8, dstDescPtr, imageNamesStart, dstImgSizes, MAX_IMAGE_DUMP);
             }
         }
     }
diff --git a/utilities/test_suite/HOST/Tensor_host_audio.cpp b/utilities/test_suite/HOST/Tensor_host_audio.cpp
index fe6fa1246..c7768b9dd 100644
--- a/utilities/test_suite/HOST/Tensor_host_audio.cpp
+++ b/utilities/test_suite/HOST/Tensor_host_audio.cpp
@@ -132,13 +132,12 @@ int main(int argc, char **argv)
     double maxWallTime = 0, minWallTime = 500, avgWallTime = 0;
     string testCaseName;
     printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", func.c_str(), numRuns, batchSize);
-    for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++)
+    for (int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
-        for (int iterCount = 0; iterCount < noOfIterations; iterCount++)
+        // read and decode audio and fill the audio dim values
+        read_audio_batch_and_fill_dims(srcDescPtr, inputf32, audioFilesPath, iterCount, srcLengthTensor, channelsTensor);
+        for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++)
         {
-            // read and decode audio and fill the audio dim values
-            read_audio_batch_and_fill_dims(srcDescPtr, inputf32, audioFilesPath, iterCount, srcLengthTensor, channelsTensor);
-
             double startWallTime, endWallTime;
             double wallTime;
             switch (testCase)
@@ -234,28 +233,28 @@ int main(int argc, char **argv)
             maxWallTime = std::max(maxWallTime, wallTime);
             minWallTime = std::min(minWallTime, wallTime);
             avgWallTime += wallTime;
+        }
 
-            // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests
-            if (testType == 0)
+        // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests
+        if (testType == 0)
+        {
+            /* Run only if testCase is not 0
+            For testCase 0 verify_non_silent_region_detection function is used for QA testing */
+            if (testCase != 0)
+                verify_output(outputf32, dstDescPtr, dstDims, testCaseName, dst, scriptPath);
+
+            /* Dump the outputs to csv files for debugging
+            Runs only if
+            1. DEBUG_MODE is enabled
+            2. Current iteration is 1st iteration
+            3. Test case is not 0 */
+            if (DEBUG_MODE && iterCount == 0 && testCase != 0)
             {
-                /* Run only if testCase is not 0
-                For testCase 0 verify_non_silent_region_detection function is used for QA testing */
-                if (testCase != 0)
-                    verify_output(outputf32, dstDescPtr, dstDims, testCaseName, dst, scriptPath);
-
-                /* Dump the outputs to csv files for debugging
-                Runs only if
-                1. DEBUG_MODE is enabled
-                2. Current iteration is 1st iteration
-                3. Test case is not 0 */
-                if (DEBUG_MODE && iterCount == 0 && testCase != 0)
-                {
-                    std::ofstream refFile;
-                    refFile.open(func + ".csv");
-                    for (int i = 0; i < oBufferSize; i++)
-                        refFile << *(outputf32 + i) << "\n";
-                    refFile.close();
-                }
+                std::ofstream refFile;
+                refFile.open(func + ".csv");
+                for (int i = 0; i < oBufferSize; i++)
+                    refFile << *(outputf32 + i) << "\n";
+                refFile.close();
             }
         }
     }
diff --git a/utilities/test_suite/HOST/Tensor_voxel_host.cpp b/utilities/test_suite/HOST/Tensor_voxel_host.cpp
index 0198c3ca0..260ccbe3a 100644
--- a/utilities/test_suite/HOST/Tensor_voxel_host.cpp
+++ b/utilities/test_suite/HOST/Tensor_voxel_host.cpp
@@ -148,8 +148,7 @@ int main(int argc, char * argv[])
 
     // Run case-wise RPP API and measure time
     int missingFuncFlag = 0;
-    double startWallTime, endWallTime, wallTime;
-    double maxWallTime = 0, minWallTime = 5000, avgWallTime = 0;
+    double maxWallTime = 0, minWallTime = 5000, avgWallTime = 0, wallTime = 0;
     int noOfIterations = (int)noOfFiles / batchSize;
     string testCaseName;
 
@@ -163,59 +162,60 @@ int main(int argc, char * argv[])
     }
 
     printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", funcName.c_str(), numRuns, batchSize);
-    for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++)
+    for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
     {
-        for(int iterCount = 0; iterCount < noOfIterations; iterCount++)
-        {
-            vector<string>::const_iterator dataFilePathStart = dataFilePath.begin() + (iterCount * batchSize);
-            vector<string>::const_iterator dataFilePathEnd = dataFilePathStart + batchSize;
-            nifti_1_header *niftiHeaderTemp = niftiHeader + batchSize * iterCount;
+        vector<string>::const_iterator dataFilePathStart = dataFilePath.begin() + (iterCount * batchSize);
+        vector<string>::const_iterator dataFilePathEnd = dataFilePathStart + batchSize;
+        nifti_1_header *niftiHeaderTemp = niftiHeader + batchSize * iterCount;
 
-            read_nifti_data(dataFilePathStart, dataFilePathEnd, niftiDataArray, niftiHeaderTemp);
+        read_nifti_data(dataFilePathStart, dataFilePathEnd, niftiDataArray, niftiHeaderTemp);
 
-            // optionally pick full image as ROI or a smaller slice of the 3D tensor in X/Y/Z dimensions
-            for(int i = 0; i < batchSize; i++)
-            {
-                // option 1 - test using roi as the whole 3D image - not sliced (example for 240 x 240 x 155 x 1)
-                roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;                                    // start X dim = 0
-                roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                    // start Y dim = 0
-                roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0;                                    // start Z dim = 0
-                roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeaderTemp[i].dim[1];         // length in X dim
-                roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeaderTemp[i].dim[2];        // length in Y dim
-                roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeaderTemp[i].dim[3];         // length in Z dim
-                // option 2 - test using roi as a smaller 3D tensor slice - sliced in X, Y and Z dims (example for 240 x 240 x 155 x 1)
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader.dim[1] / 4;            // start X dim = 60
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = niftiHeader[i].dim[2] / 4;         // start Y dim = 60
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
-                // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] / 2;      // length in X dim = 120
-                // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2] / 2;     // length in Y dim = 120
-                // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
-                // option 3 - test using roi as a smaller 3D tensor slice - sliced in only Z dim (example for 240 x 240 x 155 x 1)
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;                                 // start X dim = 0
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                 // start Y dim = 0
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
-                // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1];          // length in X dim = 240
-                // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2];         // length in Y dim = 240
-                // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
-                // option 4 - test using roi as a smaller 3D tensor slice - sliced in only X and Z dim (example for 240 x 240 x 155 x 1)
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader[i].dim[1] / 5;         // start X dim = 48
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                 // start Y dim = 0
-                // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
-                // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] * 3 / 5;  // length in X dim = 144
-                // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2];         // length in Y dim = 240
-                // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
-            }
+        // optionally pick full image as ROI or a smaller slice of the 3D tensor in X/Y/Z dimensions
+        for(int i = 0; i < batchSize; i++)
+        {
+            // option 1 - test using roi as the whole 3D image - not sliced (example for 240 x 240 x 155 x 1)
+            roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;                                    // start X dim = 0
+            roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                    // start Y dim = 0
+            roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0;                                    // start Z dim = 0
+            roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeaderTemp[i].dim[1];         // length in X dim
+            roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeaderTemp[i].dim[2];        // length in Y dim
+            roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeaderTemp[i].dim[3];         // length in Z dim
+            // option 2 - test using roi as a smaller 3D tensor slice - sliced in X, Y and Z dims (example for 240 x 240 x 155 x 1)
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader.dim[1] / 4;            // start X dim = 60
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = niftiHeader[i].dim[2] / 4;         // start Y dim = 60
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
+            // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] / 2;      // length in X dim = 120
+            // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2] / 2;     // length in Y dim = 120
+            // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
+            // option 3 - test using roi as a smaller 3D tensor slice - sliced in only Z dim (example for 240 x 240 x 155 x 1)
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0;                                 // start X dim = 0
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                 // start Y dim = 0
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
+            // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1];          // length in X dim = 240
+            // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2];         // length in Y dim = 240
+            // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
+            // option 4 - test using roi as a smaller 3D tensor slice - sliced in only X and Z dim (example for 240 x 240 x 155 x 1)
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader[i].dim[1] / 5;         // start X dim = 48
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0;                                 // start Y dim = 0
+            // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3;         // start Z dim = 51
+            // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] * 3 / 5;  // length in X dim = 144
+            // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2];         // length in Y dim = 240
+            // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3;      // length in Z dim = 51
+        }
 
-            // Convert default NIFTI_DATATYPE unstrided buffer to RpptDataType::F32 strided buffer
-            convert_input_niftitype_to_Rpp32f_generic(niftiDataArray, niftiHeaderTemp, inputF32 , descriptorPtr3D);
+        // Convert default NIFTI_DATATYPE unstrided buffer to RpptDataType::F32 strided buffer
+        convert_input_niftitype_to_Rpp32f_generic(niftiDataArray, niftiHeaderTemp, inputF32 , descriptorPtr3D);
 
-            // Typecast input from F32 to U8 if input bitdepth requested is U8
-            if (inputBitDepth == 0)
-            {
-                for(int i = 0; i < iBufferSizeU8; i++)
-                    inputU8[i] = std::min(std::max(static_cast<unsigned char>(inputF32[i]), static_cast<unsigned char>(0)), static_cast<unsigned char>(255));
-            }
+        // Typecast input from F32 to U8 if input bitdepth requested is U8
+        if (inputBitDepth == 0)
+        {
+            for(int i = 0; i < iBufferSizeU8; i++)
+                inputU8[i] = std::min(std::max(static_cast<unsigned char>(inputF32[i]), static_cast<unsigned char>(0)), static_cast<unsigned char>(255));
+        }
 
+        for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++)
+        {
+            double startWallTime, endWallTime;
             switch (testCase)
             {
                 case 0:
@@ -311,106 +311,107 @@ int main(int argc, char * argv[])
             maxWallTime = std::max(maxWallTime, wallTime);
             minWallTime = std::min(minWallTime, wallTime);
             avgWallTime += wallTime;
-            wallTime *= 1000;
+
             if (missingFuncFlag == 1)
             {
                 printf("\nThe functionality doesn't yet exist in RPP\n");
                 return -1;
             }
+        }
 
-            if(testType == 0)
+        wallTime *= 1000;
+        if(testType == 0)
+        {
+            cout << "\n\nCPU Backend Wall Time: " << wallTime <<" ms per batch"<< endl;
+            if(DEBUG_MODE)
             {
-                cout << "\n\nCPU Backend Wall Time: " << wallTime <<" ms per nifti file"<< endl;
-                if(DEBUG_MODE)
-                {
-                    std::ofstream refFile;
-                    std::string refFileName;
-                    if(layoutType == 0)
-                        refFileName = testCaseName + "_nifti_host_pkd3.csv";
-                    else if(layoutType == 1)
-                        refFileName = testCaseName + "_nifti_host_pln3.csv";
-                    else
-                        refFileName = testCaseName + "_nifti_host_pln1.csv";
-                    refFile.open(refFileName);
-                    for (int i = 0; i < oBufferSize; i++)
-                        refFile << *(outputF32 + i) << ",";
-                    refFile.close();
-                }
+                std::ofstream refFile;
+                std::string refFileName;
+                if(layoutType == 0)
+                    refFileName = testCaseName + "_nifti_host_pkd3.csv";
+                else if(layoutType == 1)
+                    refFileName = testCaseName + "_nifti_host_pln3.csv";
+                else
+                    refFileName = testCaseName + "_nifti_host_pln1.csv";
+                refFile.open(refFileName);
+                for (int i = 0; i < oBufferSize; i++)
+                    refFile << *(outputF32 + i) << ",";
+                refFile.close();
+            }
 
-                if(inputBitDepth == 0)
-                {
-                    Rpp64u bufferLength = iBufferSize * sizeof(Rpp8u) + descriptorPtr3D->offsetInBytes;
+            if(inputBitDepth == 0)
+            {
+                Rpp64u bufferLength = iBufferSize * sizeof(Rpp8u) + descriptorPtr3D->offsetInBytes;
 
-                    // Copy U8 buffer to F32 buffer for display purposes
-                    for(int i = 0; i < bufferLength; i++)
-                        outputF32[i] = static_cast<float>(outputU8[i]);
-                }
+                // Copy U8 buffer to F32 buffer for display purposes
+                for(int i = 0; i < bufferLength; i++)
+                    outputF32[i] = static_cast<float>(outputU8[i]);
+            }
 
-                /*Compare the output of the function with golden outputs only if
-                1.QA Flag is set
-                2.input bit depth 2 (F32)*/
-                if(qaFlag && inputBitDepth == 2)
-                    compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath);
-                else
+            /*Compare the output of the function with golden outputs only if
+            1.QA Flag is set
+            2.input bit depth 2 (F32)*/
+            if(qaFlag && inputBitDepth == 2)
+                compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath);
+            else
+            {
+                for(int batchCount = 0; batchCount < batchSize; batchCount++)
                 {
-                    for(int batchCount = 0; batchCount < batchSize; batchCount++)
+                    int index = iterCount * batchSize + batchCount;
+                    Rpp32f *outputTemp = outputF32 + batchCount * descriptorPtr3D->strides[0];
+                    for(int i = 0; i < numChannels; i++) // temporary changes to process pln3
                     {
-                        int index = iterCount * batchSize + batchCount;
-                        Rpp32f *outputTemp = outputF32 + batchCount * descriptorPtr3D->strides[0];
-                        for(int i = 0; i < numChannels; i++) // temporary changes to process pln3
+                        int xyFrameSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2];
+                        int xyFrameSizeROI = roiGenericSrcPtr[batchCount].xyzwhdROI.roiWidth * roiGenericSrcPtr[batchCount].xyzwhdROI.roiHeight;
+
+                        uint dataSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2] * niftiHeaderTemp[batchCount].dim[3];
+                        uchar *niftiDataU8 = (uchar *) malloc(dataSize * sizeof(uchar));
+                        uchar *outputBufferOpenCV = (uchar *)calloc(xyFrameSizeROI, sizeof(uchar));
+
+                        // Convert RpptDataType::F32 strided buffer to default NIFTI_DATATYPE unstrided buffer
+                        Rpp64u increment;
+                        if (descriptorPtr3D->layout == RpptLayout::NCDHW)
+                            increment = (Rpp64u)descriptorPtr3D->strides[1];
+                        else
+                            increment = 1;
+                        convert_output_Rpp32f_to_niftitype_generic(outputTemp + i * increment, descriptorPtr3D, niftiDataArray[batchCount], &niftiHeaderTemp[batchCount]);
+                        NIFTI_DATATYPE min = niftiDataArray[batchCount][0];
+                        NIFTI_DATATYPE max = niftiDataArray[batchCount][0];
+                        for (int i = 0; i < dataSize; i++)
+                        {
+                            min = std::min(min, niftiDataArray[batchCount][i]);
+                            max = std::max(max, niftiDataArray[batchCount][i]);
+                        }
+                        Rpp32f multiplier = 255.0f / (max - min);
+                        for (int i = 0; i < dataSize; i++)
+                            niftiDataU8[i] = (uchar)((niftiDataArray[batchCount][i] - min) * multiplier);
+
+                        uchar *niftiDataU8Temp = niftiDataU8;
+                        for (int zPlane = roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z; zPlane < roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z + roiGenericSrcPtr[batchCount].xyzwhdROI.roiDepth; zPlane++)
+                        {
+                            write_image_from_nifti_opencv(niftiDataU8Temp, niftiHeaderTemp[batchCount].dim[1], (RpptRoiXyzwhd *)roiGenericSrcPtr, outputBufferOpenCV, zPlane, i, batchCount, dstPath, testCaseName, index);
+                            niftiDataU8Temp += xyFrameSize;
+                        }
+
+                        write_nifti_file(&niftiHeaderTemp[batchCount], niftiDataArray[batchCount], index, i, dstPath, testCaseName);
+
+                        if(i == 0)
+                        {
+                            std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_0_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
+                            system(command.c_str());
+                        }
+                        if(i == 1)
+                        {
+                            std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_1_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
+                            system(command.c_str());
+                        }
+                        if(i == 2)
                         {
-                            int xyFrameSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2];
-                            int xyFrameSizeROI = roiGenericSrcPtr[batchCount].xyzwhdROI.roiWidth * roiGenericSrcPtr[batchCount].xyzwhdROI.roiHeight;
-
-                            uint dataSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2] * niftiHeaderTemp[batchCount].dim[3];
-                            uchar *niftiDataU8 = (uchar *) malloc(dataSize * sizeof(uchar));
-                            uchar *outputBufferOpenCV = (uchar *)calloc(xyFrameSizeROI, sizeof(uchar));
-
-                            // Convert RpptDataType::F32 strided buffer to default NIFTI_DATATYPE unstrided buffer
-                            Rpp64u increment;
-                            if (descriptorPtr3D->layout == RpptLayout::NCDHW)
-                                increment = (Rpp64u)descriptorPtr3D->strides[1];
-                            else
-                                increment = 1;
-                            convert_output_Rpp32f_to_niftitype_generic(outputTemp + i * increment, descriptorPtr3D, niftiDataArray[batchCount], &niftiHeaderTemp[batchCount]);
-                            NIFTI_DATATYPE min = niftiDataArray[batchCount][0];
-                            NIFTI_DATATYPE max = niftiDataArray[batchCount][0];
-                            for (int i = 0; i < dataSize; i++)
-                            {
-                                min = std::min(min, niftiDataArray[batchCount][i]);
-                                max = std::max(max, niftiDataArray[batchCount][i]);
-                            }
-                            Rpp32f multiplier = 255.0f / (max - min);
-                            for (int i = 0; i < dataSize; i++)
-                                niftiDataU8[i] = (uchar)((niftiDataArray[batchCount][i] - min) * multiplier);
-
-                            uchar *niftiDataU8Temp = niftiDataU8;
-                            for (int zPlane = roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z; zPlane < roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z + roiGenericSrcPtr[batchCount].xyzwhdROI.roiDepth; zPlane++)
-                            {
-                                write_image_from_nifti_opencv(niftiDataU8Temp, niftiHeaderTemp[batchCount].dim[1], (RpptRoiXyzwhd *)roiGenericSrcPtr, outputBufferOpenCV, zPlane, i, batchCount, dstPath, testCaseName, index);
-                                niftiDataU8Temp += xyFrameSize;
-                            }
-
-                            write_nifti_file(&niftiHeaderTemp[batchCount], niftiDataArray[batchCount], index, i, dstPath, testCaseName);
-
-                            if(i == 0)
-                            {
-                                std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_0_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
-                                system(command.c_str());
-                            }
-                            if(i == 1)
-                            {
-                                std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_1_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
-                                system(command.c_str());
-                            }
-                            if(i == 2)
-                            {
-                                std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_2_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
-                                system(command.c_str());
-                            }
-                            free(niftiDataU8);
-                            free(outputBufferOpenCV);
+                            std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_2_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif";
+                            system(command.c_str());
                         }
+                        free(niftiDataU8);
+                        free(outputBufferOpenCV);
                     }
                 }
             }
diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py
index 70ec00026..db54cf53e 100644
--- a/utilities/test_suite/HOST/runAudioTests.py
+++ b/utilities/test_suite/HOST/runAudioTests.py
@@ -23,11 +23,10 @@
 """
 
 import os
-import subprocess  # nosec
-import argparse
 import sys
-import datetime
-import shutil
+sys.dont_write_bytecode = True
+sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
+from common import *
 
 # Set the timestamp
 timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
@@ -39,56 +38,6 @@
 caseMin = 0
 caseMax = 3
 
-# Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents
-def validate_and_remove_files(path):
-    if not path:  # check if a string is empty
-        print("Folder path is empty.")
-        exit(0)
-
-    elif path == "/*":  # check if the root directory is passed to the function
-        print("Root folder cannot be deleted.")
-        exit(0)
-
-    elif os.path.exists(path):  # check if the folder exists
-        # Get a list of files and directories within the specified path
-        items = os.listdir(path)
-
-        if items:
-            # The directory is not empty, delete its contents
-            for item in items:
-                item_path = os.path.join(path, item)
-                if os.path.isfile(item_path):
-                    os.remove(item_path)
-                elif os.path.isdir(item_path):
-                    shutil.rmtree(item_path)     # Delete the directory if it exists
-
-    else:
-        print("Path is invalid or does not exist.")
-        exit(0)
-
-# Check if the folder is the root folder or exists, and remove the specified subfolders
-def validate_and_remove_folders(path, folder):
-    if path == "/*":  # check if the root directory is passed to the function
-        print("Root folder cannot be deleted.")
-        exit(0)
-    if path and os.path.isdir(path):  # checks if directory string is not empty and it exists
-        output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)]
-
-        # Loop through each directory and delete it only if it exists
-        for folder_name in output_folders:
-            folder_path = os.path.join(path, folder_name)
-            if os.path.isdir(folder_path):
-                shutil.rmtree(folder_path)  # Delete the directory if it exists
-                print("Deleted directory:", folder_path)
-            else:
-                print("Directory not found:", folder_path)
-
-# Validate if a path exists and is a directory
-def validate_path(input_path):
-    if not os.path.exists(input_path):
-        raise ValueError("path " + input_path +" does not exist.")
-    if not os.path.isdir(input_path):
-        raise ValueError("path " + input_path + " is not a directory.")
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
@@ -229,6 +178,9 @@ def rpp_test_suite_parser_and_validator():
 subprocess.run(["cmake", scriptPath], cwd=".")   # nosec
 subprocess.run(["make", "-j16"], cwd=".")    # nosec
 
+# List of cases supported
+supportedCaseList = ['0', '1', '2', '3']
+
 if testType == 0:
     if batchSize != 3:
         print("QA tests can only run with a batch size of 3.")
@@ -240,8 +192,7 @@ def rpp_test_suite_parser_and_validator():
                 srcPath = scriptPath + "/../TEST_AUDIO_FILES/three_sample_multi_channel_src1"
             else:
                 srcPath = inFilePath
-        if int(case) < 0 or int(case) > 3:
-            print(f"Invalid case number {case}. Case number must be 0-3 range!")
+        if case not in supportedCaseList:
             continue
 
         run_unit_test(srcPath, case, numRuns, testType, batchSize, outFilePath)
@@ -252,88 +203,24 @@ def rpp_test_suite_parser_and_validator():
                 srcPath = scriptPath + "/../TEST_AUDIO_FILES/three_sample_multi_channel_src1"
             else:
                 srcPath = inFilePath
-        if int(case) < 0 or int(case) > 3:
-            print(f"Invalid case number {case}. Case number must be 0-3 range!")
+        if case not in supportedCaseList:
             continue
 
         run_performance_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath)
 
 # print the results of qa tests
-supportedCaseList = ['0', '1', '2', '3']
 nonQACaseList = [] # Add cases present in supportedCaseList, but without QA support
 
 if testType == 0:
     qaFilePath = os.path.join(outFilePath, "QA_results.txt")
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
-        f = open(qaFilePath, 'r+')
         print("---------------------------------- Results of QA Test - Tensor_host_audio -----------------------------------\n")
-        numLines = 0
-        numPassed = 0
-        for line in f:
-            sys.stdout.write(line)
-            numLines += 1
-            if "PASSED" in line:
-                numPassed += 1
-            sys.stdout.flush()
-        resultsInfo = "\n\nFinal Results of Tests:"
-        resultsInfo += "\n    - Total test cases including all subvariants REQUESTED = " + str(numLines)
-        resultsInfo += "\n    - Total test cases including all subvariants PASSED = " + str(numPassed)
-        resultsInfo += "\n\nGeneral information on Tensor test suite availability:"
-        resultsInfo += "\n    - Total augmentations supported in Tensor audio test suite = " + str(len(supportedCaseList))
-        resultsInfo += "\n    - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList))
-        resultsInfo += "\n    - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList))
-        f.write(resultsInfo)
-        print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------")
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
 
 # Performance tests
 if (testType == 1):
     log_file_list = get_log_file_list()
+    for log_file in log_file_list:
+        print_performance_tests_summary(log_file, "", numRuns)
 
-    try:
-        f = open(log_file_list[0], "r")
-        print("\n\n\nOpened log file -> "+ log_file_list[0])
-    except IOError:
-        print("Skipping file -> "+ log_file_list[0])
-        exit(0)
-
-    # Initialize data structures to store the parsed data
-    functions = []
-    max_wall_times = []
-    min_wall_times = []
-    avg_wall_times = []
-    prev_line = ""
-    funcCount = 0
-
-    for line in f:
-            if "max,min,avg wall times in ms/batch" in line:
-                split_word_start = "Running "
-                split_word_end = " " + str(numRuns)
-                prev_line = prev_line.partition(split_word_start)[2].partition(split_word_end)[0]
-                if prev_line not in functions:
-                    functions.append(prev_line)
-                    split_word_start = "max,min,avg wall times in ms/batch = "
-                    split_word_end = "\n"
-                    stats = line.partition(split_word_start)[2].partition(split_word_end)[0].split(",")
-                    max_wall_times.append(float(stats[0]))
-                    min_wall_times.append(float(stats[1]))
-                    avg_wall_times.append(float(stats[2]))
-                    funcCount += 1
-
-            if line != "\n":
-                prev_line = line
-
-    # Print log lengths
-    print("Functionalities - "+ str(funcCount))
-
-    # Print the summary in a well-formatted table
-    print("\n\nFunctionality\t\t\t\t\t\tnumRuns\t\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n")
-
-    if len(functions) > 0:
-        max_func_length = max(len(func) for func in functions)
-
-        for i, func in enumerate(functions):
-            print("{func}\t\t\t\t{numRuns}\t{:<15.6f}\t{:<15.6f}\t{:<15.6f}".format(
-                max_wall_times[i], min_wall_times[i], avg_wall_times[i], func=func, numRuns=numRuns))
-    else:
-        print("No functionality data found in the log file.")
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py
index b08c4d5e8..9aa92d9c4 100644
--- a/utilities/test_suite/HOST/runTests.py
+++ b/utilities/test_suite/HOST/runTests.py
@@ -23,12 +23,10 @@
 """
 
 import os
-import subprocess  # nosec
-import argparse
 import sys
-import datetime
-import shutil
-import pandas as pd
+sys.dont_write_bytecode = True
+sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
+from common import *
 
 # Set the timestamp
 timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
@@ -41,69 +39,8 @@
 perfQaInputFile = scriptPath + "/../TEST_IMAGES/eight_images_mixed_src1"
 outFolderPath = os.getcwd()
 buildFolderPath = os.getcwd()
-
-# Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents
-def validate_and_remove_files(path):
-    if not path:  # check if a string is empty
-        print("Folder path is empty.")
-        exit()
-
-    elif path == "/*":  # check if the root directory is passed to the function
-        print("Root folder cannot be deleted.")
-        exit()
-
-    elif os.path.exists(path):  # check if the folder exists
-        # Get a list of files and directories within the specified path
-        items = os.listdir(path)
-
-        if items:
-            # The directory is not empty, delete its contents
-            for item in items:
-                item_path = os.path.join(path, item)
-                if os.path.isfile(item_path):
-                    os.remove(item_path)
-                elif os.path.isdir(item_path):
-                    shutil.rmtree(item_path)     # Delete the directory if it exists
-
-    else:
-        print("Path is invalid or does not exist.")
-        exit()
-
-# Check if the folder is the root folder or exists, and remove the specified subfolders
-def validate_and_remove_folders(path, folder):
-    if path == "/*":  # check if the root directory is passed to the function
-        print("Root folder cannot be deleted.")
-        exit()
-    if path and os.path.isdir(path):  # checks if directory string is not empty and it exists
-        output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)]
-
-        # Loop through each directory and delete it only if it exists
-        for folder_name in output_folders:
-            folder_path = os.path.join(path, folder_name)
-            if os.path.isdir(folder_path):
-                shutil.rmtree(folder_path)  # Delete the directory if it exists
-                print("Deleted directory:", folder_path)
-            else:
-                print("Directory not found:", folder_path)
-
-# Create layout directories within a destination path based on a layout dictionary
-def create_layout_directories(dst_path, layout_dict):
-    for layout in range(3):
-        current_layout = layout_dict[layout]
-        try:
-            os.makedirs(dst_path + '/' + current_layout)
-        except FileExistsError:
-            pass
-        folder_list = [f for f in os.listdir(dst_path) if current_layout.lower() in f]
-        for folder in folder_list:
-            os.rename(dst_path + '/' + folder, dst_path + '/' + current_layout +  '/' + folder)
-
-# Validate if a path exists and is a directory
-def validate_path(input_path):
-    if not os.path.exists(input_path):
-        raise ValueError("path " + input_path +" does not exist.")
-    if not os.path.isdir(input_path):
-        raise ValueError("path " + input_path + " is not a directory.")
+caseMin = 0
+caseMax = 89
 
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list(preserveOutput):
@@ -130,43 +67,20 @@ def func_group_finder(case_number):
     else:
         return "miscellaneous"
 
-# Generate a directory name based on certain parameters
-def directory_name_generator(qaMode, affinity, layoutType, case, path):
-    if qaMode == 0:
-        functionality_group = func_group_finder(int(case))
-        dst_folder_temp = "{}/rpp_{}_{}_{}".format(path, affinity, layoutType, functionality_group)
-    else:
-        dst_folder_temp = path
-
-    return dst_folder_temp
-
-# Process the layout based on the given parameters and generate the directory name and log file layout.
-def process_layout(layout, qaMode, case, dstPath):
-    if layout == 0:
-        dstPathTemp = directory_name_generator(qaMode, "host", "pkd3", case, dstPath)
-        log_file_layout = "pkd3"
-    elif layout == 1:
-        dstPathTemp = directory_name_generator(qaMode, "host", "pln3", case, dstPath)
-        log_file_layout = "pln3"
-    elif layout == 2:
-        dstPathTemp = directory_name_generator(qaMode, "host", "pln1", case, dstPath)
-        log_file_layout = "pln1"
-
-    return dstPathTemp, log_file_layout
-
 def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList):
     print("\n\n\n\n")
     print("--------------------------------")
     print("Running a New Functionality...")
     print("--------------------------------")
+    bitDepths = range(7)
+    outputFormatToggles = [0, 1]
     if qaMode:
-        maxBitdepth = 1
-    else:
-        maxBitdepth = 7
-    for bitDepth in range(maxBitdepth):
+        bitDepths = [0]
+        outputFormatToggles = [0]
+    for bitDepth in bitDepths:
         print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
 
-        for outputFormatToggle in range(2):
+        for outputFormatToggle in outputFormatToggles:
             # There is no layout toggle for PLN1 case, so skip this case
             if layout == 2 and outputFormatToggle == 1:
                 continue
@@ -216,11 +130,10 @@ def run_performance_test(loggingFolder, log_file_layout, srcPath1, srcPath2, dst
     print("--------------------------------")
     print("Running a New Functionality...")
     print("--------------------------------")
+    bitDepths = range(7)
     if qaMode:
-        maxBitdepth = 1
-    else:
-        maxBitdepth = 7
-    for bitDepth in range(maxBitdepth):
+        bitDepths = [0]
+    for bitDepth in bitDepths:
         print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
 
         for outputFormatToggle in range(2):
@@ -244,8 +157,8 @@ def rpp_test_suite_parser_and_validator():
     parser = argparse.ArgumentParser()
     parser.add_argument("--input_path1", type = str, default = inFilePath1, help = "Path to the input folder 1")
     parser.add_argument("--input_path2", type = str, default = inFilePath2, help = "Path to the input folder 2")
-    parser.add_argument("--case_start", type = int, default = 0, help = "Testing range starting case # - (0:89)")
-    parser.add_argument("--case_end", type = int, default = 89, help = "Testing range ending case # - (0:89)")
+    parser.add_argument("--case_start", type = int, default = caseMin, help = "Testing start case # - Range must be in [" + str(caseMin) + ":" + str(caseMax) + "]")
+    parser.add_argument("--case_end", type = int, default = caseMax, help = "Testing end case # - Range must be in [" + str(caseMin) + ":" + str(caseMax) + "]")
     parser.add_argument('--test_type', type = int, default = 0, help = "Type of Test - (0 = Unit tests / 1 = Performance tests)")
     parser.add_argument('--case_list', nargs = "+", help = "List of case numbers to list", required = False)
     parser.add_argument('--qa_mode', type = int, default = 0, help = "Run with qa_mode? Output images from tests will be compared with golden outputs - (0 / 1)", required = False)
@@ -263,8 +176,8 @@ def rpp_test_suite_parser_and_validator():
     validate_path(perfQaInputFile)
 
     # validate the parameters passed by user
-    if ((args.case_start < 0 or args.case_start > 89) or (args.case_end < 0 or args.case_end > 89)):
-        print("Starting case# and Ending case# must be in the 0:89 range. Aborting!")
+    if ((args.case_start < caseMin or args.case_start > caseMax) or (args.case_end < caseMin or args.case_end > caseMax)):
+        print(f"Starting case# and Ending case# must be in the {caseMin}:{caseMax} range. Aborting!")
         exit(0)
     elif args.case_end < args.case_start:
         print("Ending case# must be greater than starting case#. Aborting!")
@@ -278,7 +191,7 @@ def rpp_test_suite_parser_and_validator():
     elif args.decoder_type < 0 or args.decoder_type > 1:
         print("Decoder Type must be in the 0/1 (0 = OpenCV / 1 = TurboJPEG). Aborting")
         exit(0)
-    elif args.case_list is not None and args.case_start > 0 and args.case_end < 89:
+    elif args.case_list is not None and args.case_start > caseMin and args.case_end < caseMax:
         print("Invalid input! Please provide only 1 option between case_list, case_start and case_end")
         exit(0)
     elif args.num_runs <= 0:
@@ -302,9 +215,9 @@ def rpp_test_suite_parser_and_validator():
         args.case_list = [str(x) for x in args.case_list]
     else:
         for case in args.case_list:
-            if int(case) < 0 or int(case) > 89:
-                 print("The case# must be in the 0:89 range!")
-                 exit(0)
+            if int(case) < caseMin or int(case) > caseMax:
+                print(f"Invalid case number {case}! Case number must be in the {caseMin}:{caseMax} range. Aborting!")
+                exit(0)
 
     return args
 
@@ -339,7 +252,7 @@ def rpp_test_suite_parser_and_validator():
     numRuns = 1
 elif(testType == 1):
     if "--num_runs" not in sys.argv:
-        numRuns = 1000 #default numRuns for running performance tests
+        numRuns = 100 #default numRuns for running performance tests
     outFilePath = outFolderPath + "/OUTPUT_PERFORMANCE_LOGS_HOST_" + timestamp
 else:
     print("Invalid TEST_TYPE specified. TEST_TYPE should be 0/1 (0 = Unittests / 1 = Performancetests)")
@@ -367,6 +280,9 @@ def rpp_test_suite_parser_and_validator():
 subprocess.run(["cmake", scriptPath], cwd=".")   # nosec
 subprocess.run(["make", "-j16"], cwd=".")    # nosec
 
+# List of cases supported
+supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '34', '36', '37', '38', '39', '45', '54', '61', '63', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89']
+
 print("\n\n\n\n\n")
 print("##########################################################################################")
 print("Running all layout Inputs...")
@@ -374,6 +290,8 @@ def rpp_test_suite_parser_and_validator():
 
 if testType == 0:
     for case in caseList:
+        if case not in supportedCaseList:
+            continue
         if case == "82" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1):
             srcPath1 = ricapInFilePath
             srcPath2 = ricapInFilePath
@@ -381,11 +299,8 @@ def rpp_test_suite_parser_and_validator():
         if qaMode == 1 and case != "82":
             srcPath1 = inFilePath1
             srcPath2 = inFilePath2
-        if int(case) < 0 or int(case) > 89:
-            print(f"Invalid case number {case}. Case number must be in the range of 0 to 89!")
-            continue
         for layout in range(3):
-            dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath)
+            dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder)
 
             if qaMode == 0:
                 if not os.path.isdir(dstPathTemp):
@@ -397,8 +312,7 @@ def rpp_test_suite_parser_and_validator():
         create_layout_directories(dstPath, layoutDict)
 else:
     for case in caseList:
-        if int(case) < 0 or int(case) > 89:
-            print(f"Invalid case number {case}. Case number must be in the range of 0 to 89!")
+        if case not in supportedCaseList:
             continue
         # if QA mode is enabled overwrite the input folders with the folders used for generating golden outputs
         if qaMode == 1 and case != "82":
@@ -408,36 +322,18 @@ def rpp_test_suite_parser_and_validator():
             srcPath1 = ricapInFilePath
             srcPath2 = ricapInFilePath
         for layout in range(3):
-            dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath)
+            dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder)
             run_performance_test(loggingFolder, log_file_layout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList)
 
 # print the results of qa tests
-supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '34', '36', '37', '38', '39', '45', '54', '61', '63', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89']
 nonQACaseList = ['8', '24', '54', '84'] # Add cases present in supportedCaseList, but without QA support
 
 if qaMode and testType == 0:
     qaFilePath = os.path.join(outFilePath, "QA_results.txt")
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
-        f = open(qaFilePath, 'r+')
         print("---------------------------------- Results of QA Test - Tensor_host ----------------------------------\n")
-        numLines = 0
-        numPassed = 0
-        for line in f:
-            sys.stdout.write(line)
-            numLines += 1
-            if "PASSED" in line:
-                numPassed += 1
-            sys.stdout.flush()
-        resultsInfo = "\n\nFinal Results of Tests:"
-        resultsInfo += "\n    - Total test cases including all subvariants REQUESTED = " + str(numLines)
-        resultsInfo += "\n    - Total test cases including all subvariants PASSED = " + str(numPassed)
-        resultsInfo += "\n\nGeneral information on Tensor test suite availability:"
-        resultsInfo += "\n    - Total augmentations supported in Tensor test suite = " + str(len(supportedCaseList))
-        resultsInfo += "\n    - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList))
-        resultsInfo += "\n    - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList))
-        f.write(resultsInfo)
-    print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------")
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
 
 layoutDict = {0:"PKD3", 1:"PLN3", 2:"PLN1"}
 # unit tests and QA mode disabled
@@ -596,63 +492,4 @@ def rpp_test_suite_parser_and_validator():
     ]
 
     for log_file in log_file_list:
-        # Opening log file
-        try:
-            f = open(log_file,"r")
-            print("\n\n\nOpened log file -> "+ log_file)
-        except IOError:
-            print("Skipping file -> "+ log_file)
-            continue
-
-        stats = []
-        maxVals = []
-        minVals = []
-        avgVals = []
-        functions = []
-        frames = []
-        prevLine = ""
-        funcCount = 0
-
-        # Loop over each line
-        for line in f:
-            for functionality_group in functionality_group_list:
-                if functionality_group in line:
-                    functions.extend([" ", functionality_group, " "])
-                    frames.extend([" ", " ", " "])
-                    maxVals.extend([" ", " ", " "])
-                    minVals.extend([" ", " ", " "])
-                    avgVals.extend([" ", " ", " "])
-
-            if "max,min,avg wall times in ms/batch" in line:
-                split_word_start = "Running "
-                split_word_end = " " +str(numRuns)
-                prevLine = prevLine.partition(split_word_start)[2].partition(split_word_end)[0]
-                if prevLine not in functions:
-                    functions.append(prevLine)
-                    frames.append(numRuns)
-                    split_word_start = "max,min,avg wall times in ms/batch = "
-                    split_word_end = "\n"
-                    stats = line.partition(split_word_start)[2].partition(split_word_end)[0].split(",")
-                    maxVals.append(stats[0])
-                    minVals.append(stats[1])
-                    avgVals.append(stats[2])
-                    funcCount += 1
-
-            if line != "\n":
-                prevLine = line
-
-        # Print log lengths
-        print("Functionalities - "+ str(funcCount))
-
-        # Print summary of log
-        print("\n\nFunctionality\t\t\t\t\t\tFrames Count\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n")
-        if len(functions) != 0:
-            maxCharLength = len(max(functions, key = len))
-            functions = [x + (' ' * (maxCharLength - len(x))) for x in functions]
-            for i, func in enumerate(functions):
-                print(func + "\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t" + str(minVals[i]) + "\t" + str(avgVals[i]))
-        else:
-            print("No variants under this category")
-
-        # Closing log file
-        f.close()
+        print_performance_tests_summary(log_file, functionality_group_list, numRuns)
diff --git a/utilities/test_suite/HOST/runTests_voxel.py b/utilities/test_suite/HOST/runTests_voxel.py
index d94dcd0f9..998d9d33e 100644
--- a/utilities/test_suite/HOST/runTests_voxel.py
+++ b/utilities/test_suite/HOST/runTests_voxel.py
@@ -23,11 +23,10 @@
 """
 
 import os
-import subprocess  # nosec
-import argparse
 import sys
-import datetime
-import shutil
+sys.dont_write_bytecode = True
+sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' ))
+from common import *
 
 # Set the timestamp
 timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
@@ -41,66 +40,6 @@
 caseMin = 0
 caseMax = 5
 
-# Check if folder path is empty, if it is the root folder, or if it exists, and remove its contents
-def validate_and_remove_contents(path):
-    if not path:  # check if a string is empty
-        print("Folder path is empty.")
-        exit()
-    if path == "/*":  # check if the root directory is passed to the function
-        print("Root folder cannot be deleted.")
-        exit()
-    if os.path.exists(path):  # check if the folder exists
-        # Get a list of files and directories within the specified path
-        items = os.listdir(path)
-
-        if items:
-            # The directory is not empty, delete its contents
-            for item in items:
-                item_path = os.path.join(path, item)
-                if os.path.isfile(item_path):
-                    os.remove(item_path)
-                elif os.path.isdir(item_path):
-                    shutil.rmtree(item_path)     # Delete the directory if it exists
-    else:
-        print("Path is invalid or does not exist.")
-        exit()
-
-# Check if the folder is the root folder or exists, and remove the specified subfolders
-def validate_and_remove_folders(path, folder):
-    if path == "/*":  # check if the root directory is passed to the function
-        print("Root folder cannot be deleted.")
-        exit()
-    if path and os.path.isdir(path):  # checks if directory string is not empty and it exists
-        output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)]
-
-        # Loop through each directory and delete it only if it exists
-        for folder_name in output_folders:
-            folder_path = os.path.join(path, folder_name)
-            if os.path.isdir(folder_path):
-                shutil.rmtree(folder_path)  # Delete the directory if it exists
-                print("Deleted directory:", folder_path)
-            else:
-                print("Directory not found:", folder_path)
-
-# Create layout directories within a destination path based on a layout dictionary
-def create_layout_directories(dst_path, layout_dict):
-    for layout in range(3):
-        current_layout = layout_dict[layout]
-        try:
-            os.makedirs(dst_path + '/' + current_layout)
-        except FileExistsError:
-            pass
-        folder_list = [f for f in os.listdir(dst_path) if current_layout.lower() in f]
-        for folder in folder_list:
-            os.rename(dst_path + '/' + folder, dst_path + '/' + current_layout +  '/' + folder)
-
-# Validate if a path exists and is a directory
-def validate_path(input_path):
-    if not os.path.exists(input_path):
-        raise ValueError("path " + input_path +" does not exist.")
-    if not os.path.isdir(input_path):
-        raise ValueError("path " + input_path + " is not a directory.")
-
 # Get a list of log files based on a flag for preserving output
 def get_log_file_list():
     return [
@@ -118,36 +57,14 @@ def func_group_finder(case_number):
     else:
         return "miscellaneous"
 
-# Generate a directory name based on certain parameters
-def directory_name_generator(qaMode, affinity, layoutType, case, path):
-    if qaMode == 0:
-        functionality_group = func_group_finder(int(case))
-        dst_folder_temp = "{}/rpp_{}_{}_{}".format(path, affinity, layoutType, functionality_group)
-    else:
-        dst_folder_temp = path
-
-    return dst_folder_temp
-
-# Process the layout based on the given parameters and generate the directory name and log file layout.
-def process_layout(layout, qaMode, case, dstPath):
-    if layout == 0:
-        dstPathTemp = directory_name_generator(qaMode, "host", "pkd3", case, dstPath)
-        logFileLayout = "pkd3"
-    elif layout == 1:
-        dstPathTemp = directory_name_generator(qaMode, "host", "pln3", case, dstPath)
-        logFileLayout = "pln3"
-    elif layout == 2:
-        dstPathTemp = directory_name_generator(qaMode, "host", "pln1", case, dstPath)
-        logFileLayout = "pln1"
-
-    return dstPathTemp, logFileLayout
-
 def run_unit_test(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize):
     print("\n\n\n\n")
     print("--------------------------------")
     print("Running a New Functionality...")
     print("--------------------------------")
     bitDepths = [0, 2]
+    if qaMode:
+        bitDepths = [2]
     for bitDepth in bitDepths:
         print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n")
         print("\n\n\n\n")
@@ -177,8 +94,7 @@ def run_performance_test(loggingFolder, logFileLayout, headerPath, dataPath, dst
                     log_file.write(cleaned_output + '\n')
                     if "max,min,avg wall times" in output:
                         log_file.write("\n")
-
-                print("------------------------------------------------------------------------------------------")
+        print("------------------------------------------------------------------------------------------")
 
 # Parse and validate command-line arguments for the RPP test suite
 def rpp_test_suite_parser_and_validator():
@@ -287,7 +203,7 @@ def rpp_test_suite_parser_and_validator():
 dstPath = outFilePath
 
 # Validate DST_FOLDER
-validate_and_remove_contents(dstPath)
+validate_and_remove_files(dstPath)
 
 # Enable extglob
 if os.path.exists(buildFolderPath + "/build"):
@@ -299,6 +215,9 @@ def rpp_test_suite_parser_and_validator():
 subprocess.run(["cmake", scriptPath], cwd=".")   # nosec
 subprocess.run(["make", "-j16"], cwd=".")  # nosec
 
+# List of cases supported
+supportedCaseList = ['0', '1', '2', '3', '5']
+
 print("\n\n\n\n\n")
 print("##########################################################################################")
 print("Running all layout Inputs...")
@@ -307,8 +226,10 @@ def rpp_test_suite_parser_and_validator():
 bitDepths = [0, 2]
 if testType == 0:
     for case in caseList:
+        if case not in supportedCaseList:
+            continue
         for layout in range(3):
-            dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath)
+            dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder)
             if qaMode == 0:
                 if not os.path.isdir(dstPathTemp):
                     os.mkdir(dstPathTemp)
@@ -316,37 +237,20 @@ def rpp_test_suite_parser_and_validator():
             run_unit_test(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize)
 else:
     for case in caseList:
+        if case not in supportedCaseList:
+            continue
         for layout in range(3):
-            dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath)
+            dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder)
             run_performance_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize)
 
 # print the results of qa tests
-supportedCaseList = ['0', '1', '2', '3', '5']
 nonQACaseList = [] # Add cases present in supportedCaseList, but without QA support
 
 if qaMode and testType == 0:
     qaFilePath = os.path.join(outFilePath, "QA_results.txt")
     checkFile = os.path.isfile(qaFilePath)
     if checkFile:
-        f = open(qaFilePath, 'r+')
-        print("---------------------------------- Results of QA Test - Tensor_voxel_host ----------------------------------\n")
-        numLines = 0
-        numPassed = 0
-        for line in f:
-            sys.stdout.write(line)
-            numLines += 1
-            if "PASSED" in line:
-                numPassed += 1
-            sys.stdout.flush()
-        resultsInfo = "\n\nFinal Results of Tests:"
-        resultsInfo += "\n    - Total test cases including all subvariants REQUESTED = " + str(numLines)
-        resultsInfo += "\n    - Total test cases including all subvariants PASSED = " + str(numPassed)
-        resultsInfo += "\n\nGeneral information on Tensor voxel test suite availability:"
-        resultsInfo += "\n    - Total augmentations supported in Tensor test suite = " + str(len(supportedCaseList))
-        resultsInfo += "\n    - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList))
-        resultsInfo += "\n    - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList))
-        f.write(resultsInfo)
-    print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------")
+        print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList)
 
 layoutDict = {0:"PKD3", 1:"PLN3", 2:"PLN1"}
 if (testType == 0 and qaMode == 0):   # Unit tests
@@ -360,63 +264,4 @@ def rpp_test_suite_parser_and_validator():
     ]
 
     for log_file in log_file_list:
-        # Opening log file
-        try:
-            f = open(log_file,"r")
-            print("\n\n\nOpened log file -> "+ log_file)
-        except IOError:
-            print("Skipping file -> "+ log_file)
-            continue
-
-        stats = []
-        maxVals = []
-        minVals = []
-        avgVals = []
-        functions = []
-        frames = []
-        prevLine = ""
-        funcCount = 0
-
-        # Loop over each line
-        for line in f:
-            for functionality_group in functionality_group_list:
-                if functionality_group in line:
-                    functions.extend([" ", functionality_group, " "])
-                    frames.extend([" ", " ", " "])
-                    maxVals.extend([" ", " ", " "])
-                    minVals.extend([" ", " ", " "])
-                    avgVals.extend([" ", " ", " "])
-
-            if "max,min,avg wall times in ms/batch" in line:
-                split_word_start = "Running "
-                split_word_end = " " +str(numRuns)
-                prevLine = prevLine.partition(split_word_start)[2].partition(split_word_end)[0]
-                if prevLine not in functions:
-                    functions.append(prevLine)
-                    frames.append(numRuns)
-                    split_word_start = "max,min,avg wall times in ms/batch = "
-                    split_word_end = "\n"
-                    stats = line.partition(split_word_start)[2].partition(split_word_end)[0].split(",")
-                    maxVals.append(stats[0])
-                    minVals.append(stats[1])
-                    avgVals.append(stats[2])
-                    funcCount += 1
-
-            if line != "\n":
-                prevLine = line
-
-        # Print log lengths
-        print("Functionalities - "+ str(funcCount))
-
-        # Print summary of log
-        print("\n\nFunctionality\t\t\t\t\t\tFrames Count\t\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n")
-        if len(functions) != 0:
-            maxCharLength = len(max(functions, key = len))
-            functions = [x + (' ' * (maxCharLength - len(x))) for x in functions]
-            for i, func in enumerate(functions):
-                print(func + "\t\t\t\t\t\t\t\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t\t" + str(minVals[i]) + "\t\t" + str(avgVals[i]))
-        else:
-            print("No variants under this category")
-
-        # Closing log file
-        f.close()
\ No newline at end of file
+        print_performance_tests_summary(log_file, functionality_group_list, numRuns)
diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py
new file mode 100644
index 000000000..1646cdc94
--- /dev/null
+++ b/utilities/test_suite/common.py
@@ -0,0 +1,232 @@
+"""
+MIT License
+
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+import os
+import subprocess  # nosec
+import argparse
+import sys
+import datetime
+import shutil
+
+# Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents
+def validate_and_remove_files(path):
+    if not path:  # check if a string is empty
+        print("Folder path is empty.")
+        exit()
+
+    elif path == "/*":  # check if the root directory is passed to the function
+        print("Root folder cannot be deleted.")
+        exit()
+
+    elif os.path.exists(path):  # check if the folder exists
+        # Get a list of files and directories within the specified path
+        items = os.listdir(path)
+
+        if items:
+            # The directory is not empty, delete its contents
+            for item in items:
+                item_path = os.path.join(path, item)
+                if os.path.isfile(item_path):
+                    os.remove(item_path)
+                elif os.path.isdir(item_path):
+                    shutil.rmtree(item_path)     # Delete the directory if it exists
+
+    else:
+        print("Path is invalid or does not exist.")
+        exit()
+
+# Check if the folder is the root folder or exists, and remove the specified subfolders
+def validate_and_remove_folders(path, folder):
+    if path == "/*":  # check if the root directory is passed to the function
+        print("Root folder cannot be deleted.")
+        exit()
+    if path and os.path.isdir(path):  # checks if directory string is not empty and it exists
+        output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)]
+
+        # Loop through each directory and delete it only if it exists
+        for folder_name in output_folders:
+            folder_path = os.path.join(path, folder_name)
+            if os.path.isdir(folder_path):
+                shutil.rmtree(folder_path)  # Delete the directory if it exists
+                print("Deleted directory:", folder_path)
+            else:
+                print("Directory not found:", folder_path)
+
+# Check if a case file exists and filter its contents based on certain conditions
+def case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter):
+    try:
+        case_file = open(CASE_FILE_PATH,'r')
+        for line in case_file:
+            print(line)
+            if not(line.startswith('"Name"')):
+                if TYPE in TENSOR_TYPE_LIST:
+                    new_file.write(line)
+                    d_counter[TYPE] = d_counter[TYPE] + 1
+        case_file.close()
+        return True
+    except IOError:
+        print("Unable to open case results")
+        return False
+
+ # Generate a directory name based on certain parameters
+def directory_name_generator(qaMode, affinity, layoutType, case, path, func_group_finder):
+    if qaMode == 0:
+        functionality_group = func_group_finder(int(case))
+        dst_folder_temp = f"{path}/rpp_{affinity}_{layoutType}_{functionality_group}"
+    else:
+        dst_folder_temp = path
+
+    return dst_folder_temp
+
+# Process the layout based on the given parameters and generate the directory name and log file layout.
+def process_layout(layout, qaMode, case, dstPath, backend, func_group_finder):
+    if layout == 0:
+        dstPathTemp = directory_name_generator(qaMode, backend, "pkd3", case, dstPath, func_group_finder)
+        log_file_layout = "pkd3"
+    elif layout == 1:
+        dstPathTemp = directory_name_generator(qaMode, backend, "pln3", case, dstPath, func_group_finder)
+        log_file_layout = "pln3"
+    elif layout == 2:
+        dstPathTemp = directory_name_generator(qaMode, backend, "pln1", case, dstPath, func_group_finder)
+        log_file_layout = "pln1"
+
+    return dstPathTemp, log_file_layout
+
+# Validate if a path exists and is a directory
+def validate_path(input_path):
+    if not os.path.exists(input_path):
+        raise ValueError("path " + input_path +" does not exist.")
+    if not os.path.isdir(input_path):
+        raise ValueError("path " + input_path + " is not a directory.")
+
+# Create layout directories within a destination path based on a layout dictionary
+def create_layout_directories(dst_path, layout_dict):
+    for layout in range(3):
+        current_layout = layout_dict[layout]
+        try:
+            os.makedirs(dst_path + '/' + current_layout)
+        except FileExistsError:
+            pass
+        folder_list = [f for f in os.listdir(dst_path) if current_layout.lower() in f]
+        for folder in folder_list:
+            os.rename(dst_path + '/' + folder, dst_path + '/' + current_layout +  '/' + folder)
+
+# Read data from the logs generated from rocprof, process the data
+# and generate performance reports based on counters and a list of types
+def generate_performance_reports(d_counter, TYPE_LIST, RESULTS_DIR):
+    import pandas as pd
+    pd.options.display.max_rows = None
+    # Generate performance report
+    for TYPE in TYPE_LIST:
+        print("\n\n\nKernels tested - ", d_counter[TYPE], "\n\n")
+        df = pd.read_csv(RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv")
+        df["AverageMs"] = df["AverageNs"] / 1000000
+        dfPrint = df.drop(['Percentage'], axis = 1)
+        dfPrint["HIP Kernel Name"] = dfPrint.iloc[:,0].str.lstrip("Hip_")
+        dfPrint_noIndices = dfPrint.astype(str)
+        dfPrint_noIndices.replace(['0', '0.0'], '', inplace = True)
+        dfPrint_noIndices = dfPrint_noIndices.to_string(index = False)
+        print(dfPrint_noIndices)
+
+# Read the data from QA logs, process the data and print the results as a summary
+def print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList):
+    f = open(qaFilePath, 'r+')
+    numLines = 0
+    numPassed = 0
+    for line in f:
+        sys.stdout.write(line)
+        numLines += 1
+        if "PASSED" in line:
+            numPassed += 1
+        sys.stdout.flush()
+    resultsInfo = "\n\nFinal Results of Tests:"
+    resultsInfo += "\n    - Total test cases including all subvariants REQUESTED = " + str(numLines)
+    resultsInfo += "\n    - Total test cases including all subvariants PASSED = " + str(numPassed)
+    resultsInfo += "\n\nGeneral information on Tensor voxel test suite availability:"
+    resultsInfo += "\n    - Total augmentations supported in Tensor test suite = " + str(len(supportedCaseList))
+    resultsInfo += "\n    - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList))
+    resultsInfo += "\n    - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList))
+    f.write(resultsInfo)
+    print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------")
+
+# Read the data from performance logs, process the data and print the results as a summary
+def print_performance_tests_summary(logFile, functionalityGroupList, numRuns):
+    try:
+        f = open(logFile, "r")
+        print("\n\n\nOpened log file -> "+ logFile)
+    except IOError:
+        print("Skipping file -> "+ logFile)
+        return
+
+    stats = []
+    maxVals = []
+    minVals = []
+    avgVals = []
+    functions = []
+    frames = []
+    prevLine = ""
+    funcCount = 0
+
+    # Loop over each line
+    for line in f:
+        for functionalityGroup in functionalityGroupList:
+            if functionalityGroup in line:
+                functions.extend([" ", functionalityGroup, " "])
+                frames.extend([" ", " ", " "])
+                maxVals.extend([" ", " ", " "])
+                minVals.extend([" ", " ", " "])
+                avgVals.extend([" ", " ", " "])
+
+        if "max,min,avg wall times in ms/batch" in line:
+            splitWordStart = "Running "
+            splitWordEnd = " " +str(numRuns)
+            prevLine = prevLine.partition(splitWordStart)[2].partition(splitWordEnd)[0]
+            if prevLine not in functions:
+                functions.append(prevLine)
+                frames.append(numRuns)
+                splitWordStart = "max,min,avg wall times in ms/batch = "
+                splitWordEnd = "\n"
+                stats = line.partition(splitWordStart)[2].partition(splitWordEnd)[0].split(",")
+                maxVals.append(stats[0])
+                minVals.append(stats[1])
+                avgVals.append(stats[2])
+                funcCount += 1
+
+        if line != "\n":
+            prevLine = line
+
+    # Print log lengths
+    print("Functionalities - "+ str(funcCount))
+
+    # Print summary of log
+    print("\n\nFunctionality\t\t\t\t\t\tFrames Count\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n")
+    if len(functions) != 0:
+        maxCharLength = len(max(functions, key = len))
+        functions = [x + (' ' * (maxCharLength - len(x))) for x in functions]
+        for i, func in enumerate(functions):
+            print(func + "\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t" + str(minVals[i]) + "\t" + str(avgVals[i]))
+    else:
+        print("No variants under this category")
+
+    # Closing log file
+    f.close()
\ No newline at end of file