diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp index 7bd46b39e..00b7d45ac 100644 --- a/utilities/test_suite/HIP/Tensor_hip.cpp +++ b/utilities/test_suite/HIP/Tensor_hip.cpp @@ -348,727 +348,727 @@ int main(int argc, char **argv) // case-wise RPP API and measure time script for Unit and Performance test printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", func.c_str(), numRuns, batchSize); - for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++) + for(int iterCount = 0; iterCount < noOfIterations; iterCount++) { - for(int iterCount = 0; iterCount < noOfIterations; iterCount++) - { - vector::const_iterator imagesPathStart = imageNamesPath.begin() + (iterCount * batchSize); - vector::const_iterator imagesPathEnd = imagesPathStart + batchSize; - vector::const_iterator imageNamesStart = imageNames.begin() + (iterCount * batchSize); - vector::const_iterator imageNamesEnd = imageNamesStart + batchSize; - vector::const_iterator imagesPathSecondStart = imageNamesPathSecond.begin() + (iterCount * batchSize); - vector::const_iterator imagesPathSecondEnd = imagesPathSecondStart + batchSize; - - // Set ROIs for src/dst - set_src_and_dst_roi(imagesPathStart, imagesPathEnd, roiTensorPtrSrc, roiTensorPtrDst, dstImgSizes); + vector::const_iterator imagesPathStart = imageNamesPath.begin() + (iterCount * batchSize); + vector::const_iterator imagesPathEnd = imagesPathStart + batchSize; + vector::const_iterator imageNamesStart = imageNames.begin() + (iterCount * batchSize); + vector::const_iterator imageNamesEnd = imageNamesStart + batchSize; + vector::const_iterator imagesPathSecondStart = imageNamesPathSecond.begin() + (iterCount * batchSize); + vector::const_iterator imagesPathSecondEnd = imagesPathSecondStart + batchSize; + + // Set ROIs for src/dst + set_src_and_dst_roi(imagesPathStart, imagesPathEnd, roiTensorPtrSrc, roiTensorPtrDst, dstImgSizes); + + //Read images + if(decoderType == 0) + read_image_batch_turbojpeg(inputu8, srcDescPtr, imagesPathStart); + else + read_image_batch_opencv(inputu8, srcDescPtr, imagesPathStart); + + // if the input layout requested is PLN3, convert PKD3 inputs to PLN3 for first and second input batch + if (layoutType == 1) + convert_pkd3_to_pln3(inputu8, srcDescPtr); - //Read images + if(dualInputCase) + { if(decoderType == 0) - read_image_batch_turbojpeg(inputu8, srcDescPtr, imagesPathStart); + read_image_batch_turbojpeg(inputu8Second, srcDescPtr, imagesPathSecondStart); else - read_image_batch_opencv(inputu8, srcDescPtr, imagesPathStart); - - // if the input layout requested is PLN3, convert PKD3 inputs to PLN3 for first and second input batch + read_image_batch_opencv(inputu8Second, srcDescPtr, imagesPathSecondStart); if (layoutType == 1) - convert_pkd3_to_pln3(inputu8, srcDescPtr); - - if(dualInputCase) - { - if(decoderType == 0) - read_image_batch_turbojpeg(inputu8Second, srcDescPtr, imagesPathSecondStart); - else - read_image_batch_opencv(inputu8Second, srcDescPtr, imagesPathSecondStart); - if (layoutType == 1) - convert_pkd3_to_pln3(inputu8Second, srcDescPtr); - } + convert_pkd3_to_pln3(inputu8Second, srcDescPtr); + } - // Convert inputs to correponding bit depth specified by user - convert_input_bitdepth(input, input_second, inputu8, inputu8Second, inputBitDepth, ioBufferSize, inputBufferSize, srcDescPtr, dualInputCase, conversionFactor); + // Convert inputs to correponding bit depth specified by user + convert_input_bitdepth(input, input_second, inputu8, inputu8Second, inputBitDepth, ioBufferSize, inputBufferSize, srcDescPtr, dualInputCase, conversionFactor); - //copy decoded inputs to hip buffers - CHECK(hipMemcpy(d_input, input, inputBufferSize, hipMemcpyHostToDevice)); - CHECK(hipMemcpy(d_output, output, outputBufferSize, hipMemcpyHostToDevice)); - if(dualInputCase) - CHECK(hipMemcpy(d_input_second, input_second, inputBufferSize, hipMemcpyHostToDevice)); + //copy decoded inputs to hip buffers + CHECK(hipMemcpy(d_input, input, inputBufferSize, hipMemcpyHostToDevice)); + CHECK(hipMemcpy(d_output, output, outputBufferSize, hipMemcpyHostToDevice)); + if(dualInputCase) + CHECK(hipMemcpy(d_input_second, input_second, inputBufferSize, hipMemcpyHostToDevice)); - int roiHeightList[batchSize], roiWidthList[batchSize]; - if(roiList[0] == 0 && roiList[1] == 0 && roiList[2] == 0 && roiList[3] == 0) + int roiHeightList[batchSize], roiWidthList[batchSize]; + if(roiList[0] == 0 && roiList[1] == 0 && roiList[2] == 0 && roiList[3] == 0) + { + for(int i = 0; i < batchSize ; i++) { - for(int i = 0; i < batchSize ; i++) - { - roiList[0] = 10; - roiList[1] = 10; - roiWidthList[i] = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; - roiHeightList[i] = roiTensorPtrSrc[i].xywhROI.roiHeight / 2; - } + roiList[0] = 10; + roiList[1] = 10; + roiWidthList[i] = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; + roiHeightList[i] = roiTensorPtrSrc[i].xywhROI.roiHeight / 2; } - else + } + else + { + for(int i = 0; i < batchSize ; i++) { - for(int i = 0; i < batchSize ; i++) - { - roiWidthList[i] = roiList[2]; - roiHeightList[i] = roiList[3]; - } + roiWidthList[i] = roiList[2]; + roiHeightList[i] = roiList[3]; } + } - // Uncomment to run test case with an xywhROI override - // roi.xywhROI = {0, 0, 25, 25}; - // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize); - // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize); + // Uncomment to run test case with an xywhROI override + // roi.xywhROI = {0, 0, 25, 25}; + // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize); + // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize); - // Uncomment to run test case with an ltrbROI override - // roiTypeSrc = RpptRoiType::LTRB; - // roi.ltrbROI = {10, 10, 40, 40}; - // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize); - // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize); + // Uncomment to run test case with an ltrbROI override + // roiTypeSrc = RpptRoiType::LTRB; + // roi.ltrbROI = {10, 10, 40, 40}; + // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize); + // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize); + for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++) + { double startWallTime, endWallTime; switch (testCase) { - case 0: - { - testCaseName = "brightness"; - - Rpp32f alpha[batchSize]; - Rpp32f beta[batchSize]; - for (i = 0; i < batchSize; i++) + case 0: { - alpha[i] = 1.75; - beta[i] = 50; - } + testCaseName = "brightness"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_brightness_gpu(d_input, srcDescPtr, d_output, dstDescPtr, alpha, beta, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + Rpp32f alpha[batchSize]; + Rpp32f beta[batchSize]; + for (i = 0; i < batchSize; i++) + { + alpha[i] = 1.75; + beta[i] = 50; + } - break; - } - case 1: - { - testCaseName = "gamma_correction"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_brightness_gpu(d_input, srcDescPtr, d_output, dstDescPtr, alpha, beta, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - Rpp32f gammaVal[batchSize]; - for (i = 0; i < batchSize; i++) - gammaVal[i] = 1.9; + break; + } + case 1: + { + testCaseName = "gamma_correction"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_gamma_correction_gpu(d_input, srcDescPtr, d_output, dstDescPtr, gammaVal, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + Rpp32f gammaVal[batchSize]; + for (i = 0; i < batchSize; i++) + gammaVal[i] = 1.9; - break; - } - case 2: - { - testCaseName = "blend"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_gamma_correction_gpu(d_input, srcDescPtr, d_output, dstDescPtr, gammaVal, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - Rpp32f alpha[batchSize]; - for (i = 0; i < batchSize; i++) - alpha[i] = 0.4; + break; + } + case 2: + { + testCaseName = "blend"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_blend_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, alpha, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + Rpp32f alpha[batchSize]; + for (i = 0; i < batchSize; i++) + alpha[i] = 0.4; - break; - } - case 4: - { - testCaseName = "contrast"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_blend_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, alpha, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - Rpp32f contrastFactor[batchSize]; - Rpp32f contrastCenter[batchSize]; - for (i = 0; i < batchSize; i++) - { - contrastFactor[i] = 2.96; - contrastCenter[i] = 128; + break; } + case 4: + { + testCaseName = "contrast"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_contrast_gpu(d_input, srcDescPtr, d_output, dstDescPtr, contrastFactor, contrastCenter, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + Rpp32f contrastFactor[batchSize]; + Rpp32f contrastCenter[batchSize]; + for (i = 0; i < batchSize; i++) + { + contrastFactor[i] = 2.96; + contrastCenter[i] = 128; + } - break; - } - case 13: - { - testCaseName = "exposure"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_contrast_gpu(d_input, srcDescPtr, d_output, dstDescPtr, contrastFactor, contrastCenter, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - Rpp32f exposureFactor[batchSize]; - for (i = 0; i < batchSize; i++) - exposureFactor[i] = 1.4; + break; + } + case 13: + { + testCaseName = "exposure"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_exposure_gpu(d_input, srcDescPtr, d_output, dstDescPtr, exposureFactor, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + Rpp32f exposureFactor[batchSize]; + for (i = 0; i < batchSize; i++) + exposureFactor[i] = 1.4; - break; - } - case 20: - { - testCaseName = "flip"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_exposure_gpu(d_input, srcDescPtr, d_output, dstDescPtr, exposureFactor, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - Rpp32u horizontalFlag[batchSize]; - Rpp32u verticalFlag[batchSize]; - for (i = 0; i < batchSize; i++) - { - horizontalFlag[i] = 1; - verticalFlag[i] = 0; + break; } + case 20: + { + testCaseName = "flip"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_flip_gpu(d_input, srcDescPtr, d_output, dstDescPtr, horizontalFlag, verticalFlag, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + Rpp32u horizontalFlag[batchSize]; + Rpp32u verticalFlag[batchSize]; + for (i = 0; i < batchSize; i++) + { + horizontalFlag[i] = 1; + verticalFlag[i] = 0; + } - break; - } - case 21: - { - testCaseName = "resize"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_flip_gpu(d_input, srcDescPtr, d_output, dstDescPtr, horizontalFlag, verticalFlag, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - for (i = 0; i < batchSize; i++) - { - dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; - dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiTensorPtrSrc[i].xywhROI.roiHeight / 2; + break; } + case 21: + { + testCaseName = "resize"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_resize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, roiTensorPtrDst, roiTypeSrc, handle); - else - missingFuncFlag = 1; + for (i = 0; i < batchSize; i++) + { + dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; + dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiTensorPtrSrc[i].xywhROI.roiHeight / 2; + } - break; - } - case 23: - { - testCaseName = "rotate"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_resize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, roiTensorPtrDst, roiTypeSrc, handle); + else + missingFuncFlag = 1; - if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR)) - { - missingFuncFlag = 1; break; } + case 23: + { + testCaseName = "rotate"; - Rpp32f angle[batchSize]; - for (i = 0; i < batchSize; i++) - angle[i] = 50; - - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_rotate_gpu(d_input, srcDescPtr, d_output, dstDescPtr, angle, interpolationType, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + if ((interpolationType != RpptInterpolationType::BILINEAR) && (interpolationType != RpptInterpolationType::NEAREST_NEIGHBOR)) + { + missingFuncFlag = 1; + break; + } - break; - } - case 29: - { - testCaseName = "water"; + Rpp32f angle[batchSize]; + for (i = 0; i < batchSize; i++) + angle[i] = 50; - Rpp32f amplX[batchSize]; - Rpp32f amplY[batchSize]; - Rpp32f freqX[batchSize]; - Rpp32f freqY[batchSize]; - Rpp32f phaseX[batchSize]; - Rpp32f phaseY[batchSize]; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_rotate_gpu(d_input, srcDescPtr, d_output, dstDescPtr, angle, interpolationType, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - for (i = 0; i < batchSize; i++) - { - amplX[i] = 2.0f; - amplY[i] = 5.0f; - freqX[i] = 5.8f; - freqY[i] = 1.2f; - phaseX[i] = 10.0f; - phaseY[i] = 15.0f; + break; } + case 29: + { + testCaseName = "water"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_water_gpu(d_input, srcDescPtr, d_output, dstDescPtr, amplX, amplY, freqX, freqY, phaseX, phaseY, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + Rpp32f amplX[batchSize]; + Rpp32f amplY[batchSize]; + Rpp32f freqX[batchSize]; + Rpp32f freqY[batchSize]; + Rpp32f phaseX[batchSize]; + Rpp32f phaseY[batchSize]; - break; - } - case 30: - { - testCaseName = "non_linear_blend"; + for (i = 0; i < batchSize; i++) + { + amplX[i] = 2.0f; + amplY[i] = 5.0f; + freqX[i] = 5.8f; + freqY[i] = 1.2f; + phaseX[i] = 10.0f; + phaseY[i] = 15.0f; + } - Rpp32f stdDev[batchSize]; - for (i = 0; i < batchSize; i++) - stdDev[i] = 50.0; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_water_gpu(d_input, srcDescPtr, d_output, dstDescPtr, amplX, amplY, freqX, freqY, phaseX, phaseY, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_non_linear_blend_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, stdDev, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + break; + } + case 30: + { + testCaseName = "non_linear_blend"; - break; - } - case 31: - { - testCaseName = "color_cast"; + Rpp32f stdDev[batchSize]; + for (i = 0; i < batchSize; i++) + stdDev[i] = 50.0; - RpptRGB rgbTensor[batchSize]; - Rpp32f alphaTensor[batchSize]; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_non_linear_blend_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, stdDev, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - for (i = 0; i < batchSize; i++) - { - rgbTensor[i].R = 0; - rgbTensor[i].G = 0; - rgbTensor[i].B = 100; - alphaTensor[i] = 0.5; + break; } + case 31: + { + testCaseName = "color_cast"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_color_cast_gpu(d_input, srcDescPtr, d_output, dstDescPtr, rgbTensor, alphaTensor, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; - - break; - } - case 34: - { - testCaseName = "lut"; - - Rpp32f *lutBuffer; - CHECK(hipHostMalloc(&lutBuffer, 65536 * sizeof(Rpp32f))); - CHECK(hipMemset(lutBuffer, 0, 65536 * sizeof(Rpp32f))); - Rpp8u *lut8u = reinterpret_cast(lutBuffer); - Rpp16f *lut16f = reinterpret_cast(lutBuffer); - Rpp32f *lut32f = reinterpret_cast(lutBuffer); - Rpp8s *lut8s = reinterpret_cast(lutBuffer); - if (inputBitDepth == 0) - for (j = 0; j < 256; j++) - lut8u[j] = (Rpp8u)(255 - j); - else if (inputBitDepth == 3) - for (j = 0; j < 256; j++) - lut16f[j] = (Rpp16f)((255 - j) * ONE_OVER_255); - else if (inputBitDepth == 4) - for (j = 0; j < 256; j++) - lut32f[j] = (Rpp32f)((255 - j) * ONE_OVER_255); - else if (inputBitDepth == 5) - for (j = 0; j < 256; j++) - lut8s[j] = (Rpp8s)(255 - j - 128); - - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0) - rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut8u, roiTensorPtrSrc, roiTypeSrc, handle); - else if (inputBitDepth == 3) - rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut16f, roiTensorPtrSrc, roiTypeSrc, handle); - else if (inputBitDepth == 4) - rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut32f, roiTensorPtrSrc, roiTypeSrc, handle); - else if (inputBitDepth == 5) - rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut8s, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + RpptRGB rgbTensor[batchSize]; + Rpp32f alphaTensor[batchSize]; - break; + for (i = 0; i < batchSize; i++) + { + rgbTensor[i].R = 0; + rgbTensor[i].G = 0; + rgbTensor[i].B = 100; + alphaTensor[i] = 0.5; + } - CHECK(hipHostFree(lutBuffer)); - } - case 36: - { - testCaseName = "color_twist"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_color_cast_gpu(d_input, srcDescPtr, d_output, dstDescPtr, rgbTensor, alphaTensor, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - Rpp32f brightness[batchSize]; - Rpp32f contrast[batchSize]; - Rpp32f hue[batchSize]; - Rpp32f saturation[batchSize]; - for (i = 0; i < batchSize; i++) + break; + } + case 34: { - brightness[i] = 1.4; - contrast[i] = 0.0; - hue[i] = 60.0; - saturation[i] = 1.9; + testCaseName = "lut"; + + Rpp32f *lutBuffer; + CHECK(hipHostMalloc(&lutBuffer, 65536 * sizeof(Rpp32f))); + CHECK(hipMemset(lutBuffer, 0, 65536 * sizeof(Rpp32f))); + Rpp8u *lut8u = reinterpret_cast(lutBuffer); + Rpp16f *lut16f = reinterpret_cast(lutBuffer); + Rpp32f *lut32f = reinterpret_cast(lutBuffer); + Rpp8s *lut8s = reinterpret_cast(lutBuffer); + if (inputBitDepth == 0) + for (j = 0; j < 256; j++) + lut8u[j] = (Rpp8u)(255 - j); + else if (inputBitDepth == 3) + for (j = 0; j < 256; j++) + lut16f[j] = (Rpp16f)((255 - j) * ONE_OVER_255); + else if (inputBitDepth == 4) + for (j = 0; j < 256; j++) + lut32f[j] = (Rpp32f)((255 - j) * ONE_OVER_255); + else if (inputBitDepth == 5) + for (j = 0; j < 256; j++) + lut8s[j] = (Rpp8s)(255 - j - 128); + + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0) + rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut8u, roiTensorPtrSrc, roiTypeSrc, handle); + else if (inputBitDepth == 3) + rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut16f, roiTensorPtrSrc, roiTypeSrc, handle); + else if (inputBitDepth == 4) + rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut32f, roiTensorPtrSrc, roiTypeSrc, handle); + else if (inputBitDepth == 5) + rppt_lut_gpu(d_input, srcDescPtr, d_output, dstDescPtr, lut8s, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; + + CHECK(hipHostFree(lutBuffer)); } + case 36: + { + testCaseName = "color_twist"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_color_twist_gpu(d_input, srcDescPtr, d_output, dstDescPtr, brightness, contrast, hue, saturation, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + Rpp32f brightness[batchSize]; + Rpp32f contrast[batchSize]; + Rpp32f hue[batchSize]; + Rpp32f saturation[batchSize]; + for (i = 0; i < batchSize; i++) + { + brightness[i] = 1.4; + contrast[i] = 0.0; + hue[i] = 60.0; + saturation[i] = 1.9; + } - break; - } - case 37: - { - testCaseName = "crop"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_color_twist_gpu(d_input, srcDescPtr, d_output, dstDescPtr, brightness, contrast, hue, saturation, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - for (i = 0; i < batchSize; i++) - { - roiTensorPtrDst[i].xywhROI.xy.x = roiList[0]; - roiTensorPtrDst[i].xywhROI.xy.y = roiList[1]; - dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiWidthList[i]; - dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiHeightList[i]; + break; } + case 37: + { + testCaseName = "crop"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_crop_gpu(d_input, srcDescPtr, d_output, dstDescPtr, roiTensorPtrDst, roiTypeSrc, handle); - else - missingFuncFlag = 1; + for (i = 0; i < batchSize; i++) + { + roiTensorPtrDst[i].xywhROI.xy.x = roiList[0]; + roiTensorPtrDst[i].xywhROI.xy.y = roiList[1]; + dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiWidthList[i]; + dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiHeightList[i]; + } - break; - } - case 38: - { - testCaseName = "crop_mirror_normalize"; - Rpp32f multiplier[batchSize * srcDescPtr->c]; - Rpp32f offset[batchSize * srcDescPtr->c]; - Rpp32u mirror[batchSize]; - if (srcDescPtr->c == 3) + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_crop_gpu(d_input, srcDescPtr, d_output, dstDescPtr, roiTensorPtrDst, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; + } + case 38: { - Rpp32f meanParam[3] = { 60.0f, 80.0f, 100.0f }; - Rpp32f stdDevParam[3] = { 0.9f, 0.9f, 0.9f }; - Rpp32f offsetParam[3] = { - meanParam[0] / stdDevParam[0], - meanParam[1] / stdDevParam[1], - meanParam[2] / stdDevParam[2] }; - Rpp32f multiplierParam[3] = { 1.0f / stdDevParam[0], 1.0f / stdDevParam[1], 1.0f / stdDevParam[2] }; + testCaseName = "crop_mirror_normalize"; + Rpp32f multiplier[batchSize * srcDescPtr->c]; + Rpp32f offset[batchSize * srcDescPtr->c]; + Rpp32u mirror[batchSize]; + if (srcDescPtr->c == 3) + { + Rpp32f meanParam[3] = { 60.0f, 80.0f, 100.0f }; + Rpp32f stdDevParam[3] = { 0.9f, 0.9f, 0.9f }; + Rpp32f offsetParam[3] = { - meanParam[0] / stdDevParam[0], - meanParam[1] / stdDevParam[1], - meanParam[2] / stdDevParam[2] }; + Rpp32f multiplierParam[3] = { 1.0f / stdDevParam[0], 1.0f / stdDevParam[1], 1.0f / stdDevParam[2] }; + + for (i = 0, j = 0; i < batchSize; i++, j += 3) + { + multiplier[j] = multiplierParam[0]; + offset[j] = offsetParam[0]; + multiplier[j + 1] = multiplierParam[1]; + offset[j + 1] = offsetParam[1]; + multiplier[j + 2] = multiplierParam[2]; + offset[j + 2] = offsetParam[2]; + mirror[i] = 1; + } + } + else if(srcDescPtr->c == 1) + { + Rpp32f meanParam = 100.0f; + Rpp32f stdDevParam = 0.9f; + Rpp32f offsetParam = - meanParam / stdDevParam; + Rpp32f multiplierParam = 1.0f / stdDevParam; + + for (i = 0; i < batchSize; i++) + { + multiplier[i] = multiplierParam; + offset[i] = offsetParam; + mirror[i] = 1; + } + } - for (i = 0, j = 0; i < batchSize; i++, j += 3) + for (i = 0; i < batchSize; i++) { - multiplier[j] = multiplierParam[0]; - offset[j] = offsetParam[0]; - multiplier[j + 1] = multiplierParam[1]; - offset[j + 1] = offsetParam[1]; - multiplier[j + 2] = multiplierParam[2]; - offset[j + 2] = offsetParam[2]; - mirror[i] = 1; + roiTensorPtrDst[i].xywhROI.xy.x = roiList[0]; + roiTensorPtrDst[i].xywhROI.xy.y = roiList[1]; + dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiWidthList[i]; + dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiHeightList[i]; } + + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 3 || inputBitDepth == 4 || inputBitDepth == 5) + rppt_crop_mirror_normalize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, offset, multiplier, mirror, roiTensorPtrDst, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; } - else if(srcDescPtr->c == 1) + case 39: { - Rpp32f meanParam = 100.0f; - Rpp32f stdDevParam = 0.9f; - Rpp32f offsetParam = - meanParam / stdDevParam; - Rpp32f multiplierParam = 1.0f / stdDevParam; + testCaseName = "resize_crop_mirror"; - for (i = 0; i < batchSize; i++) + if (interpolationType != RpptInterpolationType::BILINEAR) { - multiplier[i] = multiplierParam; - offset[i] = offsetParam; + missingFuncFlag = 1; + break; + } + + Rpp32u mirror[batchSize]; + for (i = 0; i < batchSize; i++) mirror[i] = 1; + + for (i = 0; i < batchSize; i++) + { + roiTensorPtrSrc[i].xywhROI.xy.x = 10; + roiTensorPtrSrc[i].xywhROI.xy.y = 10; + dstImgSizes[i].width = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; + dstImgSizes[i].height = roiTensorPtrSrc[i].xywhROI.roiHeight / 2; + roiTensorPtrDst[i].xywhROI.roiWidth = 50; + roiTensorPtrDst[i].xywhROI.roiHeight = 50; } - } - for (i = 0; i < batchSize; i++) - { - roiTensorPtrDst[i].xywhROI.xy.x = roiList[0]; - roiTensorPtrDst[i].xywhROI.xy.y = roiList[1]; - dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiWidthList[i]; - dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiHeightList[i]; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 3 || inputBitDepth == 4 || inputBitDepth == 5) + rppt_resize_crop_mirror_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, mirror, roiTensorPtrDst, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; } + case 45: + { + testCaseName = "color_temperature"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 3 || inputBitDepth == 4 || inputBitDepth == 5) - rppt_crop_mirror_normalize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, offset, multiplier, mirror, roiTensorPtrDst, roiTypeSrc, handle); - else - missingFuncFlag = 1; + Rpp32s adjustment[batchSize]; + for (i = 0; i < batchSize; i++) + adjustment[i] = 70; - break; - } - case 39: - { - testCaseName = "resize_crop_mirror"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_color_temperature_gpu(d_input, srcDescPtr, d_output, dstDescPtr, adjustment, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - if (interpolationType != RpptInterpolationType::BILINEAR) - { - missingFuncFlag = 1; break; } + case 49: + { + testCaseName = "box_filter"; + Rpp32u kernelSize = additionalParam; - Rpp32u mirror[batchSize]; - for (i = 0; i < batchSize; i++) - mirror[i] = 1; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_box_filter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - for (i = 0; i < batchSize; i++) - { - roiTensorPtrSrc[i].xywhROI.xy.x = 10; - roiTensorPtrSrc[i].xywhROI.xy.y = 10; - dstImgSizes[i].width = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; - dstImgSizes[i].height = roiTensorPtrSrc[i].xywhROI.roiHeight / 2; - roiTensorPtrDst[i].xywhROI.roiWidth = 50; - roiTensorPtrDst[i].xywhROI.roiHeight = 50; + break; } + case 54: + { + testCaseName = "gaussian_filter"; + Rpp32u kernelSize = additionalParam; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 3 || inputBitDepth == 4 || inputBitDepth == 5) - rppt_resize_crop_mirror_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, mirror, roiTensorPtrDst, roiTypeSrc, handle); - else - missingFuncFlag = 1; - - break; - } - case 45: - { - testCaseName = "color_temperature"; + Rpp32f stdDevTensor[batchSize]; + for (i = 0; i < batchSize; i++) + { + stdDevTensor[i] = 5.0f; + } - Rpp32s adjustment[batchSize]; - for (i = 0; i < batchSize; i++) - adjustment[i] = 70; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_gaussian_filter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, stdDevTensor, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_color_temperature_gpu(d_input, srcDescPtr, d_output, dstDescPtr, adjustment, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + break; + } + case 61: + { + testCaseName = "magnitude"; - break; - } - case 49: - { - testCaseName = "box_filter"; - Rpp32u kernelSize = additionalParam; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_magnitude_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_box_filter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + break; + } + case 63: + { + testCaseName = "phase"; - break; - } - case 54: - { - testCaseName = "gaussian_filter"; - Rpp32u kernelSize = additionalParam; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_phase_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - Rpp32f stdDevTensor[batchSize]; - for (i = 0; i < batchSize; i++) - { - stdDevTensor[i] = 5.0f; + break; } + case 70: + { + testCaseName = "copy"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_gaussian_filter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, stdDevTensor, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_copy_gpu(d_input, srcDescPtr, d_output, dstDescPtr, handle); + else + missingFuncFlag = 1; - break; - } - case 61: - { - testCaseName = "magnitude"; + break; + } + case 80: + { + testCaseName = "resize_mirror_normalize"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_magnitude_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + if (interpolationType != RpptInterpolationType::BILINEAR) + { + missingFuncFlag = 1; + break; + } - break; - } - case 63: - { - testCaseName = "phase"; + for (i = 0; i < batchSize; i++) + { + dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; + dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; + } - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_phase_gpu(d_input, d_input_second, srcDescPtr, d_output, dstDescPtr, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + Rpp32f mean[batchSize * 3]; + Rpp32f stdDev[batchSize * 3]; + Rpp32u mirror[batchSize]; + for (i = 0, j = 0; i < batchSize; i++, j += 3) + { + mean[j] = 60.0; + stdDev[j] = 1.0; - break; - } - case 70: - { - testCaseName = "copy"; + mean[j + 1] = 80.0; + stdDev[j + 1] = 1.0; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_copy_gpu(d_input, srcDescPtr, d_output, dstDescPtr, handle); - else - missingFuncFlag = 1; + mean[j + 2] = 100.0; + stdDev[j + 2] = 1.0; + mirror[i] = 1; + } - break; - } - case 80: - { - testCaseName = "resize_mirror_normalize"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_resize_mirror_normalize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, mean, stdDev, mirror, roiTensorPtrDst, roiTypeSrc, handle); + else + missingFuncFlag = 1; - if (interpolationType != RpptInterpolationType::BILINEAR) - { - missingFuncFlag = 1; break; } - - for (i = 0; i < batchSize; i++) + case 82: { - dstImgSizes[i].width = roiTensorPtrDst[i].xywhROI.roiWidth = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; - dstImgSizes[i].height = roiTensorPtrDst[i].xywhROI.roiHeight = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; + testCaseName = "ricap"; + + Rpp32u permutationTensor[batchSize * 4]; + if(qaFlag) + init_ricap_qa(maxWidth, maxHeight, batchSize, permutationTensor, roiPtrInputCropRegion); + else + init_ricap(maxWidth, maxHeight, batchSize, permutationTensor, roiPtrInputCropRegion); + + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_ricap_gpu(d_input, srcDescPtr, d_output, dstDescPtr, permutationTensor, roiPtrInputCropRegion, roiTypeSrc, handle); + else + missingFuncFlag = 1; + break; } - - Rpp32f mean[batchSize * 3]; - Rpp32f stdDev[batchSize * 3]; - Rpp32u mirror[batchSize]; - for (i = 0, j = 0; i < batchSize; i++, j += 3) + case 83: { - mean[j] = 60.0; - stdDev[j] = 1.0; + testCaseName = "gridmask"; - mean[j + 1] = 80.0; - stdDev[j + 1] = 1.0; + Rpp32u tileWidth = 40; + Rpp32f gridRatio = 0.6; + Rpp32f gridAngle = 0.5; + RpptUintVector2D translateVector; + translateVector.x = 0.0; + translateVector.y = 0.0; - mean[j + 2] = 100.0; - stdDev[j + 2] = 1.0; - mirror[i] = 1; - } + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_gridmask_gpu(d_input, srcDescPtr, d_output, dstDescPtr, tileWidth, gridRatio, gridAngle, translateVector, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_resize_mirror_normalize_gpu(d_input, srcDescPtr, d_output, dstDescPtr, dstImgSizes, interpolationType, mean, stdDev, mirror, roiTensorPtrDst, roiTypeSrc, handle); - else - missingFuncFlag = 1; + break; + } + case 84: + { + testCaseName = "spatter"; - break; - } - case 82: - { - testCaseName = "ricap"; - - Rpp32u permutationTensor[batchSize * 4]; - if(qaFlag) - init_ricap_qa(maxWidth, maxHeight, batchSize, permutationTensor, roiPtrInputCropRegion); - else - init_ricap(maxWidth, maxHeight, batchSize, permutationTensor, roiPtrInputCropRegion); - - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_ricap_gpu(d_input, srcDescPtr, d_output, dstDescPtr, permutationTensor, roiPtrInputCropRegion, roiTypeSrc, handle); - else - missingFuncFlag = 1; - break; - } - case 83: - { - testCaseName = "gridmask"; - - Rpp32u tileWidth = 40; - Rpp32f gridRatio = 0.6; - Rpp32f gridAngle = 0.5; - RpptUintVector2D translateVector; - translateVector.x = 0.0; - translateVector.y = 0.0; - - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_gridmask_gpu(d_input, srcDescPtr, d_output, dstDescPtr, tileWidth, gridRatio, gridAngle, translateVector, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + RpptRGB spatterColor; - break; - } - case 84: - { - testCaseName = "spatter"; + // Mud Spatter + spatterColor.R = 65; + spatterColor.G = 50; + spatterColor.B = 23; - RpptRGB spatterColor; + // Blood Spatter + // spatterColor.R = 98; - // Mud Spatter - spatterColor.R = 65; - spatterColor.G = 50; - spatterColor.B = 23; + // Ink Spatter + // spatterColor.R = 5; + // spatterColor.G = 20; + // spatterColor.B = 64; - // Blood Spatter - // spatterColor.R = 98; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_spatter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, spatterColor, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - // Ink Spatter - // spatterColor.R = 5; - // spatterColor.G = 20; - // spatterColor.B = 64; + break; + } + case 85: + { + testCaseName = "swap_channels"; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_spatter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, spatterColor, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_swap_channels_gpu(d_input, srcDescPtr, d_output, dstDescPtr, handle); + else + missingFuncFlag = 1; - break; - } - case 85: - { - testCaseName = "swap_channels"; - - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_swap_channels_gpu(d_input, srcDescPtr, d_output, dstDescPtr, handle); - else - missingFuncFlag = 1; + break; + } + case 86: + { + testCaseName = "color_to_greyscale"; - break; - } - case 86: - { - testCaseName = "color_to_greyscale"; + RpptSubpixelLayout srcSubpixelLayout = RpptSubpixelLayout::RGBtype; - RpptSubpixelLayout srcSubpixelLayout = RpptSubpixelLayout::RGBtype; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_color_to_greyscale_gpu(d_input, srcDescPtr, d_output, dstDescPtr, srcSubpixelLayout, handle); + else + missingFuncFlag = 1; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_color_to_greyscale_gpu(d_input, srcDescPtr, d_output, dstDescPtr, srcSubpixelLayout, handle); - else - missingFuncFlag = 1; + break; + } + case 87: + { + testCaseName = "tensor_sum"; - break; - } - case 87: - { - testCaseName = "tensor_sum"; + if(srcDescPtr->c == 1) + reductionFuncResultArrLength = srcDescPtr->n; - if(srcDescPtr->c == 1) - reductionFuncResultArrLength = srcDescPtr->n; + startWallTime = omp_get_wtime(); - startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_tensor_sum_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_tensor_sum_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + break; + } + case 88: + { + testCaseName = "tensor_min"; - break; - } - case 88: - { - testCaseName = "tensor_min"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_tensor_min_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_tensor_min_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle); - else - missingFuncFlag = 1; + break; + } + case 89: + { + testCaseName = "tensor_max"; - break; - } - case 89: - { - testCaseName = "tensor_max"; + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_tensor_max_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; - startWallTime = omp_get_wtime(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_tensor_max_gpu(d_input, srcDescPtr, reductionFuncResultArr, reductionFuncResultArrLength, roiTensorPtrSrc, roiTypeSrc, handle); - else + break; + } + default: missingFuncFlag = 1; - - break; - } - default: - missingFuncFlag = 1; - break; - } + break; + } CHECK(hipDeviceSynchronize()); endWallTime = omp_get_wtime(); @@ -1081,117 +1081,118 @@ int main(int argc, char **argv) maxWallTime = max(maxWallTime, wallTime); minWallTime = min(minWallTime, wallTime); - avgWallTime += wallTime ; - wallTime *= 1000; - if (testType == 0) + avgWallTime += wallTime; + } + wallTime *= 1000; + + if (testType == 0) + { + cout << "\n\nGPU Backend Wall Time: " << wallTime <<" ms/batch"<< endl; + // Display results for reduction functions + if (reductionTypeCase) { - cout << "\n\nGPU Backend Wall Time: " << wallTime <<" ms/batch"<< endl; - // Display results for reduction functions - if (reductionTypeCase) + if(srcDescPtr->c == 3) + printf("\nReduction result (Batch of 3 channel images produces 4 results per image in batch): "); + else if(srcDescPtr->c == 1) { - if(srcDescPtr->c == 3) - printf("\nReduction result (Batch of 3 channel images produces 4 results per image in batch): "); - else if(srcDescPtr->c == 1) - { - printf("\nReduction result (Batch of 1 channel images produces 1 result per image in batch): "); - reductionFuncResultArrLength = srcDescPtr->n; - } - - // print reduction functions output array based on different bit depths, and precision desired - int precision = ((dstDescPtr->dataType == RpptDataType::F32) || (dstDescPtr->dataType == RpptDataType::F16)) ? 3 : 0; - if (dstDescPtr->dataType == RpptDataType::U8) - { - if (testCase == 87) - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - else - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - } - else if (dstDescPtr->dataType == RpptDataType::F16) - { - if (testCase == 87) - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - else - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - } - else if (dstDescPtr->dataType == RpptDataType::F32) - { - if (testCase == 87) - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - else - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - } - else if (dstDescPtr->dataType == RpptDataType::I8) - { - if (testCase == 87) - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - else - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - } - printf("\n"); + printf("\nReduction result (Batch of 1 channel images produces 1 result per image in batch): "); + reductionFuncResultArrLength = srcDescPtr->n; + } - /*Compare the output of the function with golden outputs only if - 1.QA Flag is set - 2.input bit depth 0 (U8) - 3.source and destination layout are the same*/ - if(qaFlag && inputBitDepth == 0 && (srcDescPtr->layout == dstDescPtr->layout) && !(randomOutputCase)) - { - if (testCase == 87) - compare_reduction_output(static_cast(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath); - else - compare_reduction_output(static_cast(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath); - } + // print reduction functions output array based on different bit depths, and precision desired + int precision = ((dstDescPtr->dataType == RpptDataType::F32) || (dstDescPtr->dataType == RpptDataType::F16)) ? 3 : 0; + if (dstDescPtr->dataType == RpptDataType::U8) + { + if (testCase == 87) + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + else + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + } + else if (dstDescPtr->dataType == RpptDataType::F16) + { + if (testCase == 87) + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + else + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); } - else + else if (dstDescPtr->dataType == RpptDataType::F32) { - CHECK(hipMemcpy(output, d_output, outputBufferSize, hipMemcpyDeviceToHost)); + if (testCase == 87) + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + else + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + } + else if (dstDescPtr->dataType == RpptDataType::I8) + { + if (testCase == 87) + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + else + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + } + printf("\n"); - // Reconvert other bit depths to 8u for output display purposes - convert_output_bitdepth_to_u8(output, outputu8, inputBitDepth, oBufferSize, outputBufferSize, dstDescPtr, invConversionFactor); + /*Compare the output of the function with golden outputs only if + 1.QA Flag is set + 2.input bit depth 0 (U8) + 3.source and destination layout are the same*/ + if(qaFlag && inputBitDepth == 0 && (srcDescPtr->layout == dstDescPtr->layout) && !(randomOutputCase)) + { + if (testCase == 87) + compare_reduction_output(static_cast(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath); + else + compare_reduction_output(static_cast(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath); + } + } + else + { + CHECK(hipMemcpy(output, d_output, outputBufferSize, hipMemcpyDeviceToHost)); - // if DEBUG_MODE is set to 1, the output of the first iteration will be dumped to csv files for debugging purposes. - if(DEBUG_MODE && iterCount == 0) - { - std::ofstream refFile; - refFile.open(func + ".csv"); - for (int i = 0; i < oBufferSize; i++) - refFile << static_cast(*(outputu8 + i)) << ","; - refFile.close(); - } + // Reconvert other bit depths to 8u for output display purposes + convert_output_bitdepth_to_u8(output, outputu8, inputBitDepth, oBufferSize, outputBufferSize, dstDescPtr, invConversionFactor); - /*Compare the output of the function with golden outputs only if - 1.QA Flag is set - 2.input bit depth 0 (Input U8 && Output U8) - 3.source and destination layout are the same - 4.augmentation case does not generate random output*/ - if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase)) - compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, testCase, dst, scriptPath); - - // Calculate exact dstROI in XYWH format for OpenCV dump - if (roiTypeSrc == RpptRoiType::LTRB) - convert_roi(roiTensorPtrDst, RpptRoiType::XYWH, dstDescPtr->n); - - // Check if the ROI values for each input is within the bounds of the max buffer allocated - RpptROI roiDefault; - RpptROIPtr roiPtrDefault = &roiDefault; - roiPtrDefault->xywhROI = {0, 0, static_cast(dstDescPtr->w), static_cast(dstDescPtr->h)}; - for (int i = 0; i < dstDescPtr->n; i++) - { - roiTensorPtrDst[i].xywhROI.roiWidth = std::min(roiPtrDefault->xywhROI.roiWidth - roiTensorPtrDst[i].xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.roiWidth); - roiTensorPtrDst[i].xywhROI.roiHeight = std::min(roiPtrDefault->xywhROI.roiHeight - roiTensorPtrDst[i].xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.roiHeight); - roiTensorPtrDst[i].xywhROI.xy.x = std::max(roiPtrDefault->xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.xy.x); - roiTensorPtrDst[i].xywhROI.xy.y = std::max(roiPtrDefault->xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.xy.y); - } + // if DEBUG_MODE is set to 1, the output of the first iteration will be dumped to csv files for debugging purposes. + if(DEBUG_MODE && iterCount == 0) + { + std::ofstream refFile; + refFile.open(func + ".csv"); + for (int i = 0; i < oBufferSize; i++) + refFile << static_cast(*(outputu8 + i)) << ","; + refFile.close(); + } - // Convert any PLN3 outputs to the corresponding PKD3 version for OpenCV dump - if (layoutType == 0 || layoutType == 1) - { - if ((dstDescPtr->c == 3) && (dstDescPtr->layout == RpptLayout::NCHW)) - convert_pln3_to_pkd3(outputu8, dstDescPtr); - } - // OpenCV dump (if testType is unit test and QA mode is not set) - if(!qaFlag) - write_image_batch_opencv(dst, outputu8, dstDescPtr, imageNamesStart, dstImgSizes, MAX_IMAGE_DUMP); + /*Compare the output of the function with golden outputs only if + 1.QA Flag is set + 2.input bit depth 0 (Input U8 && Output U8) + 3.source and destination layout are the same + 4.augmentation case does not generate random output*/ + if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase)) + compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, testCase, dst, scriptPath); + + // Calculate exact dstROI in XYWH format for OpenCV dump + if (roiTypeSrc == RpptRoiType::LTRB) + convert_roi(roiTensorPtrDst, RpptRoiType::XYWH, dstDescPtr->n); + + // Check if the ROI values for each input is within the bounds of the max buffer allocated + RpptROI roiDefault; + RpptROIPtr roiPtrDefault = &roiDefault; + roiPtrDefault->xywhROI = {0, 0, static_cast(dstDescPtr->w), static_cast(dstDescPtr->h)}; + for (int i = 0; i < dstDescPtr->n; i++) + { + roiTensorPtrDst[i].xywhROI.roiWidth = std::min(roiPtrDefault->xywhROI.roiWidth - roiTensorPtrDst[i].xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.roiWidth); + roiTensorPtrDst[i].xywhROI.roiHeight = std::min(roiPtrDefault->xywhROI.roiHeight - roiTensorPtrDst[i].xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.roiHeight); + roiTensorPtrDst[i].xywhROI.xy.x = std::max(roiPtrDefault->xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.xy.x); + roiTensorPtrDst[i].xywhROI.xy.y = std::max(roiPtrDefault->xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.xy.y); + } + + // Convert any PLN3 outputs to the corresponding PKD3 version for OpenCV dump + if (layoutType == 0 || layoutType == 1) + { + if ((dstDescPtr->c == 3) && (dstDescPtr->layout == RpptLayout::NCHW)) + convert_pln3_to_pkd3(outputu8, dstDescPtr); } + // OpenCV dump (if testType is unit test and QA mode is not set) + if(!qaFlag) + write_image_batch_opencv(dst, outputu8, dstDescPtr, imageNamesStart, dstImgSizes, MAX_IMAGE_DUMP); } } } diff --git a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp index f9f3b9f54..749f51edf 100644 --- a/utilities/test_suite/HIP/Tensor_voxel_hip.cpp +++ b/utilities/test_suite/HIP/Tensor_voxel_hip.cpp @@ -154,8 +154,7 @@ int main(int argc, char * argv[]) // Run case-wise RPP API and measure time int missingFuncFlag = 0; - double startWallTime, endWallTime, wallTime; - double maxWallTime = 0, minWallTime = 5000, avgWallTime = 0; + double maxWallTime = 0, minWallTime = 5000, avgWallTime = 0, wallTime = 0; int noOfIterations = (int)noOfFiles / batchSize; string testCaseName; @@ -173,63 +172,64 @@ int main(int argc, char * argv[]) } printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", funcName.c_str(), numRuns, batchSize); - for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++) + for(int iterCount = 0; iterCount < noOfIterations; iterCount++) { - for(int iterCount = 0; iterCount < noOfIterations; iterCount++) - { - vector::const_iterator dataFilePathStart = dataFilePath.begin() + (iterCount * batchSize); - vector::const_iterator dataFilePathEnd = dataFilePathStart + batchSize; - nifti_1_header *niftiHeaderTemp = niftiHeader + batchSize * iterCount; + vector::const_iterator dataFilePathStart = dataFilePath.begin() + (iterCount * batchSize); + vector::const_iterator dataFilePathEnd = dataFilePathStart + batchSize; + nifti_1_header *niftiHeaderTemp = niftiHeader + batchSize * iterCount; - read_nifti_data(dataFilePathStart, dataFilePathEnd, niftiDataArray, niftiHeaderTemp); + read_nifti_data(dataFilePathStart, dataFilePathEnd, niftiDataArray, niftiHeaderTemp); - // optionally pick full image as ROI or a smaller slice of the 3D tensor in X/Y/Z dimensions - for(int i = 0; i < batchSize; i++) - { - // option 1 - test using roi as the whole 3D image - not sliced (example for 240 x 240 x 155 x 1) - roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0; // start X dim = 0 - roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 - roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0; // start Z dim = 0 - roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeaderTemp[i].dim[1]; // length in X dim - roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeaderTemp[i].dim[2]; // length in Y dim - roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeaderTemp[i].dim[3]; // length in Z dim - // option 2 - test using roi as a smaller 3D tensor slice - sliced in X, Y and Z dims (example for 240 x 240 x 155 x 1) - // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader.dim[1] / 4; // start X dim = 60 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = niftiHeader[i].dim[2] / 4; // start Y dim = 60 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 - // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] / 2; // length in X dim = 120 - // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2] / 2; // length in Y dim = 120 - // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 - // option 3 - test using roi as a smaller 3D tensor slice - sliced in only Z dim (example for 240 x 240 x 155 x 1) - // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0; // start X dim = 0 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 - // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1]; // length in X dim = 240 - // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2]; // length in Y dim = 240 - // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 - // option 4 - test using roi as a smaller 3D tensor slice - sliced in only X and Z dim (example for 240 x 240 x 155 x 1) - // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader[i].dim[1] / 5; // start X dim = 48 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 - // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] * 3 / 5; // length in X dim = 144 - // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2]; // length in Y dim = 240 - // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 - } + // optionally pick full image as ROI or a smaller slice of the 3D tensor in X/Y/Z dimensions + for(int i = 0; i < batchSize; i++) + { + // option 1 - test using roi as the whole 3D image - not sliced (example for 240 x 240 x 155 x 1) + roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0; // start X dim = 0 + roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 + roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0; // start Z dim = 0 + roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeaderTemp[i].dim[1]; // length in X dim + roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeaderTemp[i].dim[2]; // length in Y dim + roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeaderTemp[i].dim[3]; // length in Z dim + // option 2 - test using roi as a smaller 3D tensor slice - sliced in X, Y and Z dims (example for 240 x 240 x 155 x 1) + // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader.dim[1] / 4; // start X dim = 60 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = niftiHeader[i].dim[2] / 4; // start Y dim = 60 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 + // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] / 2; // length in X dim = 120 + // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2] / 2; // length in Y dim = 120 + // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 + // option 3 - test using roi as a smaller 3D tensor slice - sliced in only Z dim (example for 240 x 240 x 155 x 1) + // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0; // start X dim = 0 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 + // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1]; // length in X dim = 240 + // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2]; // length in Y dim = 240 + // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 + // option 4 - test using roi as a smaller 3D tensor slice - sliced in only X and Z dim (example for 240 x 240 x 155 x 1) + // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader[i].dim[1] / 5; // start X dim = 48 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 + // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] * 3 / 5; // length in X dim = 144 + // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2]; // length in Y dim = 240 + // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 + } - // Convert default NIFTI_DATATYPE unstrided buffer to RpptDataType::F32 strided buffer - convert_input_niftitype_to_Rpp32f_generic(niftiDataArray, niftiHeaderTemp, inputF32 , descriptorPtr3D); + // Convert default NIFTI_DATATYPE unstrided buffer to RpptDataType::F32 strided buffer + convert_input_niftitype_to_Rpp32f_generic(niftiDataArray, niftiHeaderTemp, inputF32 , descriptorPtr3D); - // Typecast input from F32 to U8 if input bitdepth requested is U8 - if (inputBitDepth == 0) - { - for(int i = 0; i < iBufferSizeU8; i++) - inputU8[i] = std::min(std::max(static_cast(inputF32[i]), static_cast(0)), static_cast(255)); - CHECK(hipMemcpy(d_inputU8, inputU8, iBufferSizeU8, hipMemcpyHostToDevice)); - } + // Typecast input from F32 to U8 if input bitdepth requested is U8 + if (inputBitDepth == 0) + { + for(int i = 0; i < iBufferSizeU8; i++) + inputU8[i] = std::min(std::max(static_cast(inputF32[i]), static_cast(0)), static_cast(255)); + CHECK(hipMemcpy(d_inputU8, inputU8, iBufferSizeU8, hipMemcpyHostToDevice)); + } - //Copy input buffer to hip - CHECK(hipMemcpy(d_inputF32, inputF32, iBufferSizeInBytes, hipMemcpyHostToDevice)); + //Copy input buffer to hip + CHECK(hipMemcpy(d_inputF32, inputF32, iBufferSizeInBytes, hipMemcpyHostToDevice)); + for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++) + { + double startWallTime, endWallTime; switch (testCase) { case 0: @@ -327,108 +327,110 @@ int main(int argc, char * argv[]) maxWallTime = std::max(maxWallTime, wallTime); minWallTime = std::min(minWallTime, wallTime); avgWallTime += wallTime; - wallTime *= 1000; - if (missingFuncFlag == 1) + } + + wallTime *= 1000; + if (missingFuncFlag == 1) + { + printf("\nThe functionality doesn't yet exist in RPP\n"); + return -1; + } + + // Copy output buffer to host + CHECK(hipMemcpy(outputF32, d_outputF32, oBufferSizeInBytes, hipMemcpyDeviceToHost)); + if(testType == 0) + { + cout << "\n\nGPU Backend Wall Time: " << wallTime <<" ms per batch"<< endl; + if(DEBUG_MODE) { - printf("\nThe functionality doesn't yet exist in RPP\n"); - return -1; + std::ofstream refFile; + std::string refFileName; + if(layoutType == 0) + refFileName = testCaseName + "_nifti_hip_pkd3.csv"; + else if(layoutType == 1) + refFileName = testCaseName + "_nifti_hip_pln3.csv"; + else + refFileName = testCaseName + "_nifti_hip_pln1.csv"; + refFile.open(refFileName); + for (int i = 0; i < oBufferSize; i++) + refFile << *(outputF32 + i) << ","; + refFile.close(); } - // Copy output buffer to host - CHECK(hipMemcpy(outputF32, d_outputF32, oBufferSizeInBytes, hipMemcpyDeviceToHost)); - if(testType == 0) - { - cout << "\n\nGPU Backend Wall Time: " << wallTime <<" ms per nifti file"<< endl; - if(DEBUG_MODE) - { - std::ofstream refFile; - std::string refFileName; - if(layoutType == 0) - refFileName = testCaseName + "_nifti_hip_pkd3.csv"; - else if(layoutType == 1) - refFileName = testCaseName + "_nifti_hip_pln3.csv"; - else - refFileName = testCaseName + "_nifti_hip_pln1.csv"; - refFile.open(refFileName); - for (int i = 0; i < oBufferSize; i++) - refFile << *(outputF32 + i) << ","; - refFile.close(); - } - if(inputBitDepth == 0) - { - Rpp64u bufferLength = iBufferSize * sizeof(Rpp8u) + descriptorPtr3D->offsetInBytes; - CHECK(hipMemcpy(outputU8, d_outputU8, bufferLength, hipMemcpyDeviceToHost)); + if(inputBitDepth == 0) + { + Rpp64u bufferLength = iBufferSize * sizeof(Rpp8u) + descriptorPtr3D->offsetInBytes; + CHECK(hipMemcpy(outputU8, d_outputU8, bufferLength, hipMemcpyDeviceToHost)); - // Copy U8 buffer to F32 buffer for display purposes - for(int i = 0; i < bufferLength; i++) - outputF32[i] = static_cast(outputU8[i]); - } + // Copy U8 buffer to F32 buffer for display purposes + for(int i = 0; i < bufferLength; i++) + outputF32[i] = static_cast(outputU8[i]); + } - /*Compare the output of the function with golden outputs only if - 1.QA Flag is set - 2.input bit depth 2 (F32)*/ - if(qaFlag && inputBitDepth == 2) - compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath); - else + /*Compare the output of the function with golden outputs only if + 1.QA Flag is set + 2.input bit depth 2 (F32)*/ + if(qaFlag && inputBitDepth == 2) + compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath); + else + { + for(int batchCount = 0; batchCount < batchSize; batchCount++) { - for(int batchCount = 0; batchCount < batchSize; batchCount++) + int index = iterCount * batchSize + batchCount; + Rpp32f *outputTemp = outputF32 + batchCount * descriptorPtr3D->strides[0]; + for(int i = 0; i < numChannels; i++) // temporary changes to process pln3 { - int index = iterCount * batchSize + batchCount; - Rpp32f *outputTemp = outputF32 + batchCount * descriptorPtr3D->strides[0]; - for(int i = 0; i < numChannels; i++) // temporary changes to process pln3 + int xyFrameSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2]; + int xyFrameSizeROI = roiGenericSrcPtr[batchCount].xyzwhdROI.roiWidth * roiGenericSrcPtr[batchCount].xyzwhdROI.roiHeight; + + uint dataSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2] * niftiHeaderTemp[batchCount].dim[3]; + uchar *niftiDataU8 = (uchar *) malloc(dataSize * sizeof(uchar)); + uchar *outputBufferOpenCV = (uchar *)calloc(xyFrameSizeROI, sizeof(uchar)); + + // Convert RpptDataType::F32 strided buffer to default NIFTI_DATATYPE unstrided buffer + Rpp64u increment; + if (descriptorPtr3D->layout == RpptLayout::NCDHW) + increment = (Rpp64u)descriptorPtr3D->strides[1]; + else + increment = 1; + convert_output_Rpp32f_to_niftitype_generic(outputTemp + i * increment, descriptorPtr3D, niftiDataArray[batchCount], &niftiHeaderTemp[batchCount]); + NIFTI_DATATYPE min = niftiDataArray[batchCount][0]; + NIFTI_DATATYPE max = niftiDataArray[batchCount][0]; + for (int i = 0; i < dataSize; i++) + { + min = std::min(min, niftiDataArray[batchCount][i]); + max = std::max(max, niftiDataArray[batchCount][i]); + } + Rpp32f multiplier = 255.0f / (max - min); + for (int i = 0; i < dataSize; i++) + niftiDataU8[i] = (uchar)((niftiDataArray[batchCount][i] - min) * multiplier); + + uchar *niftiDataU8Temp = niftiDataU8; + for (int zPlane = roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z; zPlane < roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z + roiGenericSrcPtr[batchCount].xyzwhdROI.roiDepth; zPlane++) + { + write_image_from_nifti_opencv(niftiDataU8Temp, niftiHeaderTemp[batchCount].dim[1], (RpptRoiXyzwhd *)roiGenericSrcPtr, outputBufferOpenCV, zPlane, i, batchCount, dstPath, testCaseName, index); + niftiDataU8Temp += xyFrameSize; + } + + write_nifti_file(&niftiHeaderTemp[batchCount], niftiDataArray[batchCount], index, i, dstPath, testCaseName); + + if(i == 0) + { + std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_0_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; + system(command.c_str()); + } + if(i == 1) + { + std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_1_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; + system(command.c_str()); + } + if(i == 2) { - int xyFrameSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2]; - int xyFrameSizeROI = roiGenericSrcPtr[batchCount].xyzwhdROI.roiWidth * roiGenericSrcPtr[batchCount].xyzwhdROI.roiHeight; - - uint dataSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2] * niftiHeaderTemp[batchCount].dim[3]; - uchar *niftiDataU8 = (uchar *) malloc(dataSize * sizeof(uchar)); - uchar *outputBufferOpenCV = (uchar *)calloc(xyFrameSizeROI, sizeof(uchar)); - - // Convert RpptDataType::F32 strided buffer to default NIFTI_DATATYPE unstrided buffer - Rpp64u increment; - if (descriptorPtr3D->layout == RpptLayout::NCDHW) - increment = (Rpp64u)descriptorPtr3D->strides[1]; - else - increment = 1; - convert_output_Rpp32f_to_niftitype_generic(outputTemp + i * increment, descriptorPtr3D, niftiDataArray[batchCount], &niftiHeaderTemp[batchCount]); - NIFTI_DATATYPE min = niftiDataArray[batchCount][0]; - NIFTI_DATATYPE max = niftiDataArray[batchCount][0]; - for (int i = 0; i < dataSize; i++) - { - min = std::min(min, niftiDataArray[batchCount][i]); - max = std::max(max, niftiDataArray[batchCount][i]); - } - Rpp32f multiplier = 255.0f / (max - min); - for (int i = 0; i < dataSize; i++) - niftiDataU8[i] = (uchar)((niftiDataArray[batchCount][i] - min) * multiplier); - - uchar *niftiDataU8Temp = niftiDataU8; - for (int zPlane = roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z; zPlane < roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z + roiGenericSrcPtr[batchCount].xyzwhdROI.roiDepth; zPlane++) - { - write_image_from_nifti_opencv(niftiDataU8Temp, niftiHeaderTemp[batchCount].dim[1], (RpptRoiXyzwhd *)roiGenericSrcPtr, outputBufferOpenCV, zPlane, i, batchCount, dstPath, testCaseName, index); - niftiDataU8Temp += xyFrameSize; - } - - write_nifti_file(&niftiHeaderTemp[batchCount], niftiDataArray[batchCount], index, i, dstPath, testCaseName); - - if(i == 0) - { - std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_0_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; - system(command.c_str()); - } - if(i == 1) - { - std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_1_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; - system(command.c_str()); - } - if(i == 2) - { - std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_2_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; - system(command.c_str()); - } - free(niftiDataU8); - free(outputBufferOpenCV); + std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_2_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; + system(command.c_str()); } + free(niftiDataU8); + free(outputBufferOpenCV); } } } diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index 2e8054332..e5be154c4 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -21,13 +21,11 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - import os -import subprocess # nosec -import argparse import sys -import datetime -import shutil +sys.dont_write_bytecode = True +sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) +from common import * # Set the timestamp timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") @@ -39,109 +37,8 @@ qaInputFile = scriptPath + "/../TEST_IMAGES/three_images_mixed_src1" outFolderPath = os.getcwd() buildFolderPath = os.getcwd() - -# Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents -def validate_and_remove_files(path): - if not path: # check if a string is empty - print("Folder path is empty.") - exit() - - elif path == "/*": # check if the root directory is passed to the function - print("Root folder cannot be deleted.") - exit() - - elif os.path.exists(path): # check if the folder exists - # Get a list of files and directories within the specified path - items = os.listdir(path) - - if items: - # The directory is not empty, delete its contents - for item in items: - item_path = os.path.join(path, item) - if os.path.isfile(item_path): - os.remove(item_path) - elif os.path.isdir(item_path): - shutil.rmtree(item_path) # Delete the directory if it exists - - else: - print("Path is invalid or does not exist.") - exit() - -# Check if the folder is the root folder or exists, and remove the specified subfolders -def validate_and_remove_folders(path, folder): - if path == "/*": # check if the root directory is passed to the function - print("Root folder cannot be deleted.") - exit() - if path and os.path.isdir(path): # checks if directory string is not empty and it exists - output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)] - - # Loop through each directory and delete it only if it exists - for folder_name in output_folders: - folder_path = os.path.join(path, folder_name) - if os.path.isdir(folder_path): - shutil.rmtree(folder_path) # Delete the directory if it exists - print("Deleted directory:", folder_path) - else: - print("Directory not found:", folder_path) - -# Check if a case file exists and filter its contents based on certain conditions -def case_file_check(CASE_FILE_PATH): - try: - case_file = open(CASE_FILE_PATH,'r') - for line in case_file: - print(line) - if not(line.startswith('"Name"')): - if TYPE in TENSOR_TYPE_LIST: - new_file.write(line) - d_counter[TYPE] = d_counter[TYPE] + 1 - case_file.close() - return True - except IOError: - print("Unable to open case results") - return False - - # Generate a directory name based on certain parameters -def directory_name_generator(qaMode, affinity, layoutType, case, path): - if qaMode == 0: - functionality_group = func_group_finder(int(case)) - dst_folder_temp = f"{path}/rpp_{affinity}_{layoutType}_{functionality_group}" - else: - dst_folder_temp = path - - return dst_folder_temp - -# Process the layout based on the given parameters and generate the directory name and log file layout. -def process_layout(layout, qaMode, case, dstPath): - if layout == 0: - dstPathTemp = directory_name_generator(qaMode, "hip", "pkd3", case, dstPath) - log_file_layout = "pkd3" - elif layout == 1: - dstPathTemp = directory_name_generator(qaMode, "hip", "pln3", case, dstPath) - log_file_layout = "pln3" - elif layout == 2: - dstPathTemp = directory_name_generator(qaMode, "hip", "pln1", case, dstPath) - log_file_layout = "pln1" - - return dstPathTemp, log_file_layout - -# Validate if a path exists and is a directory -def validate_path(input_path): - if not os.path.exists(input_path): - raise ValueError("path " + input_path +" does not exist.") - if not os.path.isdir(input_path): - raise ValueError("path " + input_path + " is not a directory.") - -# Create layout directories within a destination path based on a layout dictionary -def create_layout_directories(dst_path, layout_dict): - for layout in range(3): - current_layout = layout_dict[layout] - try: - os.makedirs(dst_path + '/' + current_layout) - except FileExistsError: - pass - folder_list = [f for f in os.listdir(dst_path) if current_layout.lower() in f] - for folder in folder_list: - os.rename(dst_path + '/' + folder, dst_path + '/' + current_layout + '/' + folder) +caseMin = 0 +caseMax = 89 # Get a list of log files based on a flag for preserving output def get_log_file_list(preserveOutput): @@ -174,32 +71,20 @@ def func_group_finder(case_number): else: return "miscellaneous" -# Generate performance reports based on counters and a list of types -def generate_performance_reports(d_counter, TYPE_LIST): - import pandas as pd - pd.options.display.max_rows = None - # Generate performance report - for TYPE in TYPE_LIST: - print("\n\n\nKernels tested - ", d_counter[TYPE], "\n\n") - df = pd.read_csv(RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv") - df["AverageMs"] = df["AverageNs"] / 1000000 - dfPrint = df.drop(['Percentage'], axis = 1) - dfPrint["HIP Kernel Name"] = dfPrint.iloc[:,0].str.lstrip("Hip_") - dfPrint_noIndices = dfPrint.astype(str) - dfPrint_noIndices.replace(['0', '0.0'], '', inplace = True) - dfPrint_noIndices = dfPrint_noIndices.to_string(index = False) - print(dfPrint_noIndices) - def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n\n\n\n") print("--------------------------------") print("Running a New Functionality...") print("--------------------------------") - - for bitDepth in range(7): + bitDepths = range(7) + outputFormatToggles = [0, 1] + if qaMode: + bitDepths = [0] + outputFormatToggles = [0] + for bitDepth in bitDepths: print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n") - for outputFormatToggle in range(2): + for outputFormatToggle in outputFormatToggles: # There is no layout toggle for PLN1 case, so skip this case if layout == 2 and outputFormatToggle == 1: continue @@ -315,11 +200,11 @@ def rpp_test_suite_parser_and_validator(): parser = argparse.ArgumentParser() parser.add_argument("--input_path1", type = str, default = inFilePath1, help = "Path to the input folder 1") parser.add_argument("--input_path2", type = str, default = inFilePath2, help = "Path to the input folder 2") - parser.add_argument("--case_start", type = int, default = 0, help="Testing range starting case # - (0:90)") - parser.add_argument("--case_end", type = int, default = 90, help="Testing range ending case # - (0:90)") - parser.add_argument('--test_type', type = int, default = 0, help="Type of Test - (0 = Unit tests / 1 = Performance tests)") - parser.add_argument('--case_list', nargs = "+", help="List of case numbers to list", required=False) - parser.add_argument('--profiling', type = str , default='NO', help='Run with profiler? - (YES/NO)', required=False) + parser.add_argument("--case_start", type = int, default = caseMin, help = "Testing start case # - Range must be in [" + str(caseMin) + ":" + str(caseMax) + "]") + parser.add_argument("--case_end", type = int, default = caseMax, help = "Testing end case # - Range must be in [" + str(caseMin) + ":" + str(caseMax) + "]") + parser.add_argument('--test_type', type = int, default = 0, help = "Type of Test - (0 = Unit tests / 1 = Performance tests)") + parser.add_argument('--case_list', nargs = "+", help = "List of case numbers to list", required = False) + parser.add_argument('--profiling', type = str , default = 'NO', help = 'Run with profiler? - (YES/NO)', required = False) parser.add_argument('--qa_mode', type = int, default = 0, help = "Run with qa_mode? Output images from tests will be compared with golden outputs - (0 / 1)", required = False) parser.add_argument('--decoder_type', type = int, default = 0, help = "Type of Decoder to decode the input data - (0 = TurboJPEG / 1 = OpenCV)") parser.add_argument('--num_runs', type = int, default = 1, help = "Specifies the number of runs for running the performance tests") @@ -334,8 +219,8 @@ def rpp_test_suite_parser_and_validator(): validate_path(qaInputFile) # validate the parameters passed by user - if ((args.case_start < 0 or args.case_start > 90) or (args.case_end < 0 or args.case_end > 90)): - print("Starting case# and Ending case# must be in the 0:90 range. Aborting!") + if ((args.case_start < caseMin or args.case_start > caseMax) or (args.case_end < caseMin or args.case_end > caseMax)): + print(f"Starting case# and Ending case# must be in the {caseMin}:{caseMax} range. Aborting!") exit(0) elif args.case_end < args.case_start: print("Ending case# must be greater than starting case#. Aborting!") @@ -349,7 +234,7 @@ def rpp_test_suite_parser_and_validator(): elif args.decoder_type < 0 or args.decoder_type > 1: print("Decoder Type must be in the 0/1 (0 = OpenCV / 1 = TurboJPEG). Aborting") exit(0) - elif args.case_list is not None and args.case_start > 0 and args.case_end < 90: + elif args.case_list is not None and args.case_start > caseMin and args.case_end < caseMax: print("Invalid input! Please provide only 1 option between case_list, case_start and case_end") exit(0) elif args.num_runs <= 0: @@ -376,9 +261,9 @@ def rpp_test_suite_parser_and_validator(): args.case_list = [str(x) for x in args.case_list] else: for case in args.case_list: - if int(case) < 0 or int(case) > 90: - print("The case# must be in the 0:90 range!") - exit(0) + if int(case) < caseMin or int(case) > caseMax: + print(f"Invalid case number {case}! Case number must be in the {caseMin}:{caseMax} range. Aborting!") + exit(0) return args @@ -410,7 +295,7 @@ def rpp_test_suite_parser_and_validator(): numRuns = 1 elif(testType == 1): if "--num_runs" not in sys.argv: - numRuns = 1000 #default numRuns for running performance tests + numRuns = 100 #default numRuns for running performance tests outFilePath = outFolderPath + "/OUTPUT_PERFORMANCE_LOGS_HIP_" + timestamp else: print("Invalid TEST_TYPE specified. TEST_TYPE should be 0/1 (0 = Unittests / 1 = Performancetests)") @@ -438,6 +323,9 @@ def rpp_test_suite_parser_and_validator(): subprocess.run(["cmake", scriptPath], cwd=".") # nosec subprocess.run(["make", "-j16"], cwd=".") # nosec +# List of cases supported +supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '34', '36', '37', '38', '39', '45', '54', '61', '63', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89'] + # Create folders based on testType and profilingOption if testType == 1 and profilingOption == "YES": os.makedirs(f"{dstPath}/Tensor_PKD3") @@ -451,6 +339,8 @@ def rpp_test_suite_parser_and_validator(): if(testType == 0): for case in caseList: + if case not in supportedCaseList: + continue if case == "82" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1): srcPath1 = ricapInFilePath srcPath2 = ricapInFilePath @@ -458,11 +348,8 @@ def rpp_test_suite_parser_and_validator(): if qaMode == 1 and case != "82": srcPath1 = inFilePath1 srcPath2 = inFilePath2 - if int(case) < 0 or int(case) > 89: - print(f"Invalid case number {case}. Case number must be in the range of 0 to 89!") - continue for layout in range(3): - dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath) + dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder) if qaMode == 0: if not os.path.isdir(dstPathTemp): @@ -476,14 +363,13 @@ def rpp_test_suite_parser_and_validator(): else: if (testType == 1 and profilingOption == "NO"): for case in caseList: - if int(case) < 0 or int(case) > 89: - print(f"Invalid case number {case}. Case number must be in the range of 0 to 89!") + if case not in supportedCaseList: continue if case == "82" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: srcPath1 = ricapInFilePath srcPath2 = ricapInFilePath for layout in range(3): - dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath) + dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder) run_performance_test(loggingFolder, log_file_layout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) @@ -491,14 +377,13 @@ def rpp_test_suite_parser_and_validator(): NEW_FUNC_GROUP_LIST = [0, 15, 20, 29, 36, 40, 42, 49, 56, 65, 69] for case in caseList: - if int(case) < 0 or int(case) > 89: - print(f"Invalid case number {case}. Case number must be in the range of 0 to 89!") + if case not in supportedCaseList: continue if case == "82" and "--input_path1" not in sys.argv and "--input_path2" not in sys.argv: srcPath1 = ricapInFilePath srcPath2 = ricapInFilePath for layout in range(3): - dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath) + dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder) print("\n\n\n\n") print("--------------------------------") @@ -575,7 +460,7 @@ def rpp_test_suite_parser_and_validator(): # Write into csv file CASE_FILE_PATH = CASE_RESULTS_DIR + "/output_case" + str(CASE_NUM) + "_bitDepth" + str(BIT_DEPTH) + "_oft" + str(OFT) + "_kernelSize" + str(KSIZE) + ".stats.csv" print("CASE_FILE_PATH = " + CASE_FILE_PATH) - fileCheck = case_file_check(CASE_FILE_PATH) + fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter) if fileCheck == False: continue elif (CASE_NUM == "24" or CASE_NUM == "21" or CASE_NUM == "23") and TYPE.startswith("Tensor"): @@ -585,7 +470,7 @@ def rpp_test_suite_parser_and_validator(): # Write into csv file CASE_FILE_PATH = CASE_RESULTS_DIR + "/output_case" + str(CASE_NUM) + "_bitDepth" + str(BIT_DEPTH) + "_oft" + str(OFT) + "_interpolationType" + str(INTERPOLATIONTYPE) + ".stats.csv" print("CASE_FILE_PATH = " + CASE_FILE_PATH) - fileCheck = case_file_check(CASE_FILE_PATH) + fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter) if fileCheck == False: continue elif (CASE_NUM == "8") and TYPE.startswith("Tensor"): @@ -595,21 +480,21 @@ def rpp_test_suite_parser_and_validator(): # Write into csv file CASE_FILE_PATH = CASE_RESULTS_DIR + "/output_case" + str(CASE_NUM) + "_bitDepth" + str(BIT_DEPTH) + "_oft" + str(OFT) + "_noiseType" + str(NOISETYPE) + ".stats.csv" print("CASE_FILE_PATH = " + CASE_FILE_PATH) - fileCheck = case_file_check(CASE_FILE_PATH) + fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter) if fileCheck == False: continue else: # Write into csv file CASE_FILE_PATH = CASE_RESULTS_DIR + "/output_case" + str(CASE_NUM) + "_bitDepth" + str(BIT_DEPTH) + "_oft" + str(OFT) + ".stats.csv" print("CASE_FILE_PATH = " + CASE_FILE_PATH) - fileCheck = case_file_check(CASE_FILE_PATH) + fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter) if fileCheck == False: continue new_file.close() subprocess.call(['chown', '{}:{}'.format(os.getuid(), os.getgid()), RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv"]) # nosec try: - generate_performance_reports(d_counter, TYPE_LIST) + generate_performance_reports(d_counter, TYPE_LIST, RESULTS_DIR) except ImportError: print("\nPandas not available! Results of GPU profiling experiment are available in the following files:\n" + \ @@ -634,91 +519,14 @@ def rpp_test_suite_parser_and_validator(): "statistical_operations" ] for log_file in log_file_list: - # Opening log file - try: - f = open(log_file,"r") - print("\n\n\nOpened log file -> " + log_file) - except IOError: - print("Skipping file -> " + log_file) - continue - - stats = [] - maxVals = [] - minVals = [] - avgVals = [] - functions = [] - frames = [] - prevLine = "" - funcCount = 0 - - # Loop over each line - for line in f: - for functionality_group in functionality_group_list: - if functionality_group in line: - functions.extend([" ", functionality_group, " "]) - frames.extend([" ", " ", " "]) - maxVals.extend([" ", " ", " "]) - minVals.extend([" ", " ", " "]) - avgVals.extend([" ", " ", " "]) - - if "max,min,avg wall times in ms/batch" in line: - split_word_start = "Running " - split_word_end = " "+ str(numRuns) - prevLine = prevLine.partition(split_word_start)[2].partition(split_word_end)[0] - if prevLine not in functions: - functions.append(prevLine) - frames.append(str(numRuns)) - split_word_start = "max,min,avg wall times in ms/batch = " - split_word_end = "\n" - stats = line.partition(split_word_start)[2].partition(split_word_end)[0].split(",") - maxVals.append(stats[0]) - minVals.append(stats[1]) - avgVals.append(stats[2]) - funcCount += 1 - - if line != "\n": - prevLine = line - - # Print log lengths - print("Functionalities - " + str(funcCount)) - - # Print summary of log - print("\n\nFunctionality\t\t\t\t\t\tFrames Count\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n") - if len(functions) != 0: - maxCharLength = len(max(functions, key = len)) - functions = [x + (' ' * (maxCharLength - len(x))) for x in functions] - for i, func in enumerate(functions): - print(func + "\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t" + str(minVals[i]) + "\t" + str(avgVals[i])) - else: - print("No variants under this category") - - # Closing log file - f.close() + print_performance_tests_summary(log_file, functionality_group_list, numRuns) # print the results of qa tests -supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '34', '36', '37', '38', '39', '45', '54', '61', '63', '70', '80', '82', '83', '84', '85', '86', '87', '88', '89'] nonQACaseList = ['8', '24', '54', '84'] # Add cases present in supportedCaseList, but without QA support if qaMode and testType == 0: qaFilePath = os.path.join(outFilePath, "QA_results.txt") checkFile = os.path.isfile(qaFilePath) if checkFile: - f = open(qaFilePath, 'r+') print("---------------------------------- Results of QA Test - Tensor_hip ----------------------------------\n") - numLines = 0 - numPassed = 0 - for line in f: - sys.stdout.write(line) - numLines += 1 - if "PASSED" in line: - numPassed += 1 - sys.stdout.flush() - resultsInfo = "\n\nFinal Results of Tests:" - resultsInfo += "\n - Total test cases including all subvariants REQUESTED = " + str(numLines) - resultsInfo += "\n - Total test cases including all subvariants PASSED = " + str(numPassed) - resultsInfo += "\n\nGeneral information on Tensor test suite availability:" - resultsInfo += "\n - Total augmentations supported in Tensor test suite = " + str(len(supportedCaseList)) - resultsInfo += "\n - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList)) - resultsInfo += "\n - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList)) - f.write(resultsInfo) - print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------") + print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList) diff --git a/utilities/test_suite/HIP/runTests_voxel.py b/utilities/test_suite/HIP/runTests_voxel.py index d645b03fe..4ae869b75 100644 --- a/utilities/test_suite/HIP/runTests_voxel.py +++ b/utilities/test_suite/HIP/runTests_voxel.py @@ -23,11 +23,10 @@ """ import os -import subprocess # nosec -import argparse import sys -import datetime -import shutil +sys.dont_write_bytecode = True +sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) +from common import * # Set the timestamp timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") @@ -41,105 +40,6 @@ caseMin = 0 caseMax = 5 -# Check if folder path is empty, if it is the root folder, or if it exists, and remove its contents -def validate_and_remove_contents(path): - if not path: # check if a string is empty - print("Folder path is empty.") - exit() - elif path == "/*": # check if the root directory is passed to the function - print("Root folder cannot be deleted.") - exit() - elif os.path.exists(path): # check if the folder exists - # Get a list of files and directories within the specified path - items = os.listdir(path) - - if items: - # The directory is not empty, delete its contents - for item in items: - item_path = os.path.join(path, item) - if os.path.isfile(item_path): - os.remove(item_path) - elif os.path.isdir(item_path): - shutil.rmtree(item_path) # Delete the directory if it exists - else: - print("Path is invalid or does not exist.") - exit() - -# Check if the folder is the root folder or exists, and remove the specified subfolders -def validate_and_remove_folders(path, folder): - if path == "/*": # check if the root directory is passed to the function - print("Root folder cannot be deleted.") - exit() - if path and os.path.isdir(path): # checks if directory string is not empty and it exists - output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)] - - # Loop through each directory and delete it only if it exists - for folder_name in output_folders: - folder_path = os.path.join(path, folder_name) - if os.path.isdir(folder_path): - shutil.rmtree(folder_path) # Delete the directory if it exists - print("Deleted directory:", folder_path) - else: - print("Directory not found:", folder_path) - -def case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file): - try: - case_file = open(CASE_FILE_PATH,'r') - for line in case_file: - print(line) - if not(line.startswith('"Name"')): - if TYPE in TENSOR_TYPE_LIST: - new_file.write(line) - d_counter[TYPE] = d_counter[TYPE] + 1 - case_file.close() - return True - except IOError: - print("Unable to open case results") - return False - -# Generate a directory name based on certain parameters -def directory_name_generator(qaMode, affinity, layoutType, case, path): - if qaMode == 0: - functionality_group = func_group_finder(int(case)) - dst_folder_temp = "{}/rpp_{}_{}_{}".format(path, affinity, layoutType, functionality_group) - else: - dst_folder_temp = path - - return dst_folder_temp - -# Process the layout based on the given parameters and generate the directory name and log file layout. -def process_layout(layout, qaMode, case, dstPath): - if layout == 0: - dstPathTemp = directory_name_generator(qaMode, "hip", "pkd3", case, dstPath) - logFileLayout = "pkd3" - elif layout == 1: - dstPathTemp = directory_name_generator(qaMode, "hip", "pln3", case, dstPath) - logFileLayout = "pln3" - elif layout == 2: - dstPathTemp = directory_name_generator(qaMode, "hip", "pln1", case, dstPath) - logFileLayout = "pln1" - - return dstPathTemp, logFileLayout - -# Validate if a path exists and is a directory -def validate_path(input_path): - if not os.path.exists(input_path): - raise ValueError("path " + input_path +" does not exist.") - if not os.path.isdir(input_path): - raise ValueError("path " + input_path + " is not a directory.") - -# Create layout directories within a destination path based on a layout dictionary -def create_layout_directories(dst_path, layout_dict): - for layout in range(3): - current_layout = layout_dict[layout] - try: - os.makedirs(dst_path + '/' + current_layout) - except FileExistsError: - pass - folder_list = [f for f in os.listdir(dst_path) if current_layout.lower() in f] - for folder in folder_list: - os.rename(dst_path + '/' + folder, dst_path + '/' + current_layout + '/' + folder) - def get_log_file_list(preserveOutput): return [ outFolderPath + "/OUTPUT_PERFORMANCE_LOGS_HIP_VOXEL_" + timestamp + "/Tensor_voxel_hip_pkd3_raw_performance_log.txt", @@ -156,28 +56,14 @@ def func_group_finder(case_number): else: return "miscellaneous" -# Generate performance reports based on counters and a list of types -def generate_performance_reports(d_counter, TYPE_LIST): - import pandas as pd - pd.options.display.max_rows = None - # Generate performance report - for TYPE in TYPE_LIST: - print("\n\n\nKernels tested - ", d_counter[TYPE], "\n\n") - df = pd.read_csv(RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv") - df["AverageMs"] = df["AverageNs"] / 1000000 - dfPrint = df.drop(['Percentage'], axis = 1) - dfPrint["HIP Kernel Name"] = dfPrint.iloc[:,0].str.lstrip("Hip_") - dfPrint_noIndices = dfPrint.astype(str) - dfPrint_noIndices.replace(['0', '0.0'], '', inplace = True) - dfPrint_noIndices = dfPrint_noIndices.to_string(index = False) - print(dfPrint_noIndices) - def run_unit_test(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): print("\n\n\n\n") print("--------------------------------") print("Running a New Functionality...") print("--------------------------------") bitDepths = [0, 2] + if qaMode: + bitDepths = [2] for bitDepth in bitDepths: print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n") print(f"./Tensor_voxel_hip {headerPath} {dataPath} {dstPathTemp} {layout} {case} {numRuns} {testType} {qaMode} {batchSize} {bitDepth}") @@ -365,7 +251,7 @@ def rpp_test_suite_parser_and_validator(): dstPath = outFilePath # Validate DST_FOLDER -validate_and_remove_contents(dstPath) +validate_and_remove_files(dstPath) # Enable extglob if os.path.exists(buildFolderPath + "/build"): @@ -377,6 +263,9 @@ def rpp_test_suite_parser_and_validator(): subprocess.run(["cmake", scriptPath], cwd=".") # nosec subprocess.run(["make", "-j16"], cwd=".") # nosec +# List of cases supported +supportedCaseList = ['0', '1', '2', '3', '5'] + # Create folders based on testType and profilingOption if testType == 1 and profilingOption == "YES": os.makedirs(f"{dstPath}/Tensor_PKD3") @@ -390,8 +279,10 @@ def rpp_test_suite_parser_and_validator(): if testType == 0: for case in caseList: + if case not in supportedCaseList: + continue for layout in range(3): - dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath) + dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder) if qaMode == 0: if not os.path.isdir(dstPathTemp): os.mkdir(dstPathTemp) @@ -399,14 +290,18 @@ def rpp_test_suite_parser_and_validator(): run_unit_test(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize) elif (testType == 1 and profilingOption == "NO"): for case in caseList: + if case not in supportedCaseList: + continue for layout in range(3): - dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath) + dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder) run_performance_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize) elif (testType == 1 and profilingOption == "YES"): NEW_FUNC_GROUP_LIST = [0, 1] for case in caseList: + if case not in supportedCaseList: + continue for layout in range(3): - dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath) + dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "hip", func_group_finder) run_performance_test_with_profiler(loggingFolder, logFileLayout, dstPath, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize) RESULTS_DIR = "" @@ -451,14 +346,14 @@ def rpp_test_suite_parser_and_validator(): # Write into csv file CASE_FILE_PATH = CASE_RESULTS_DIR + "/output_case" + str(CASE_NUM) + ".stats.csv" print("CASE_FILE_PATH = " + CASE_FILE_PATH) - fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file) + fileCheck = case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter) if fileCheck == False: continue new_file.close() subprocess.call(['chown', '{}:{}'.format(os.getuid(), os.getgid()), RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv"]) # nosec try: - generate_performance_reports(d_counter, TYPE_LIST) + generate_performance_reports(d_counter, TYPE_LIST, RESULTS_DIR) except ImportError: print("\nPandas not available! Results of GPU profiling experiment are available in the following files:\n" + \ @@ -470,32 +365,14 @@ def rpp_test_suite_parser_and_validator(): print("Unable to open results in " + RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv") # print the results of qa tests -supportedCaseList = ['0', '1', '2', '3', '5'] nonQACaseList = [] # Add cases present in supportedCaseList, but without QA support if qaMode and testType == 0: qaFilePath = os.path.join(outFilePath, "QA_results.txt") checkFile = os.path.isfile(qaFilePath) if checkFile: - f = open(qaFilePath, 'r+') print("---------------------------------- Results of QA Test - Tensor_voxel_hip ----------------------------------\n") - numLines = 0 - numPassed = 0 - for line in f: - sys.stdout.write(line) - numLines += 1 - if "PASSED" in line: - numPassed += 1 - sys.stdout.flush() - resultsInfo = "\n\nFinal Results of Tests:" - resultsInfo += "\n - Total test cases including all subvariants REQUESTED = " + str(numLines) - resultsInfo += "\n - Total test cases including all subvariants PASSED = " + str(numPassed) - resultsInfo += "\n\nGeneral information on Tensor voxel test suite availability:" - resultsInfo += "\n - Total augmentations supported in Tensor test suite = " + str(len(supportedCaseList)) - resultsInfo += "\n - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList)) - resultsInfo += "\n - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList)) - f.write(resultsInfo) - print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------") + print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList) layoutDict = {0:"PKD3", 1:"PLN3", 2:"PLN1"} if (testType == 0 and qaMode == 0): # Unit tests @@ -509,63 +386,4 @@ def rpp_test_suite_parser_and_validator(): ] for log_file in log_file_list: - # Opening log file - try: - f = open(log_file,"r") - print("\n\n\nOpened log file -> "+ log_file) - except IOError: - print("Skipping file -> "+ log_file) - continue - - stats = [] - maxVals = [] - minVals = [] - avgVals = [] - functions = [] - frames = [] - prevLine = "" - funcCount = 0 - - # Loop over each line - for line in f: - for functionality_group in functionality_group_list: - if functionality_group in line: - functions.extend([" ", functionality_group, " "]) - frames.extend([" ", " ", " "]) - maxVals.extend([" ", " ", " "]) - minVals.extend([" ", " ", " "]) - avgVals.extend([" ", " ", " "]) - - if "max,min,avg wall times in ms/batch" in line: - split_word_start = "Running " - split_word_end = " " +str(numRuns) - prevLine = prevLine.partition(split_word_start)[2].partition(split_word_end)[0] - if prevLine not in functions: - functions.append(prevLine) - frames.append(numRuns) - split_word_start = "max,min,avg wall times in ms/batch = " - split_word_end = "\n" - stats = line.partition(split_word_start)[2].partition(split_word_end)[0].split(",") - maxVals.append(stats[0]) - minVals.append(stats[1]) - avgVals.append(stats[2]) - funcCount += 1 - - if line != "\n": - prevLine = line - - # Print log lengths - print("Functionalities - "+ str(funcCount)) - - # Print summary of log - print("\n\nFunctionality\t\t\t\t\t\tFrames Count\t\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n") - if len(functions) != 0: - maxCharLength = len(max(functions, key = len)) - functions = [x + (' ' * (maxCharLength - len(x))) for x in functions] - for i, func in enumerate(functions): - print(func + "\t\t\t\t\t\t\t\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t\t" + str(minVals[i]) + "\t\t" + str(avgVals[i])) - else: - print("No variants under this category") - - # Closing log file - f.close() \ No newline at end of file + print_performance_tests_summary(log_file, functionality_group_list, numRuns) diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index b698a2def..e93eb143a 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -347,73 +347,73 @@ int main(int argc, char **argv) // case-wise RPP API and measure time script for Unit and Performance test printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", func.c_str(), numRuns, batchSize); - for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++) + for(int iterCount = 0; iterCount < noOfIterations; iterCount++) { - for(int iterCount = 0; iterCount < noOfIterations; iterCount++) - { - vector::const_iterator imagesPathStart = imageNamesPath.begin() + (iterCount * batchSize); - vector::const_iterator imagesPathEnd = imagesPathStart + batchSize; - vector::const_iterator imageNamesStart = imageNames.begin() + (iterCount * batchSize); - vector::const_iterator imageNamesEnd = imageNamesStart + batchSize; - vector::const_iterator imagesPathSecondStart = imageNamesPathSecond.begin() + (iterCount * batchSize); - vector::const_iterator imagesPathSecondEnd = imagesPathSecondStart + batchSize; - - // Set ROIs for src/dst - set_src_and_dst_roi(imagesPathStart, imagesPathEnd, roiTensorPtrSrc, roiTensorPtrDst, dstImgSizes); + vector::const_iterator imagesPathStart = imageNamesPath.begin() + (iterCount * batchSize); + vector::const_iterator imagesPathEnd = imagesPathStart + batchSize; + vector::const_iterator imageNamesStart = imageNames.begin() + (iterCount * batchSize); + vector::const_iterator imageNamesEnd = imageNamesStart + batchSize; + vector::const_iterator imagesPathSecondStart = imageNamesPathSecond.begin() + (iterCount * batchSize); + vector::const_iterator imagesPathSecondEnd = imagesPathSecondStart + batchSize; + + // Set ROIs for src/dst + set_src_and_dst_roi(imagesPathStart, imagesPathEnd, roiTensorPtrSrc, roiTensorPtrDst, dstImgSizes); + + //Read images + if(decoderType == 0) + read_image_batch_turbojpeg(inputu8, srcDescPtr, imagesPathStart); + else + read_image_batch_opencv(inputu8, srcDescPtr, imagesPathStart); + + // if the input layout requested is PLN3, convert PKD3 inputs to PLN3 for first and second input batch + if (layoutType == 1) + convert_pkd3_to_pln3(inputu8, srcDescPtr); - //Read images + if(dualInputCase) + { if(decoderType == 0) - read_image_batch_turbojpeg(inputu8, srcDescPtr, imagesPathStart); + read_image_batch_turbojpeg(inputu8Second, srcDescPtr, imagesPathSecondStart); else - read_image_batch_opencv(inputu8, srcDescPtr, imagesPathStart); - - // if the input layout requested is PLN3, convert PKD3 inputs to PLN3 for first and second input batch + read_image_batch_opencv(inputu8Second, srcDescPtr, imagesPathSecondStart); if (layoutType == 1) - convert_pkd3_to_pln3(inputu8, srcDescPtr); - - if(dualInputCase) - { - if(decoderType == 0) - read_image_batch_turbojpeg(inputu8Second, srcDescPtr, imagesPathSecondStart); - else - read_image_batch_opencv(inputu8Second, srcDescPtr, imagesPathSecondStart); - if (layoutType == 1) - convert_pkd3_to_pln3(inputu8Second, srcDescPtr); - } + convert_pkd3_to_pln3(inputu8Second, srcDescPtr); + } - // Convert inputs to correponding bit depth specified by user - convert_input_bitdepth(input, input_second, inputu8, inputu8Second, inputBitDepth, ioBufferSize, inputBufferSize, srcDescPtr, dualInputCase, conversionFactor); + // Convert inputs to correponding bit depth specified by user + convert_input_bitdepth(input, input_second, inputu8, inputu8Second, inputBitDepth, ioBufferSize, inputBufferSize, srcDescPtr, dualInputCase, conversionFactor); - int roiHeightList[batchSize], roiWidthList[batchSize]; - if(roiList[0] == 0 && roiList[1] == 0 && roiList[2] == 0 && roiList[3] == 0) + int roiHeightList[batchSize], roiWidthList[batchSize]; + if(roiList[0] == 0 && roiList[1] == 0 && roiList[2] == 0 && roiList[3] == 0) + { + for(int i = 0; i < batchSize ; i++) { - for(int i = 0; i < batchSize ; i++) - { - roiList[0] = 10; - roiList[1] = 10; - roiWidthList[i] = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; - roiHeightList[i] = roiTensorPtrSrc[i].xywhROI.roiHeight / 2; - } + roiList[0] = 10; + roiList[1] = 10; + roiWidthList[i] = roiTensorPtrSrc[i].xywhROI.roiWidth / 2; + roiHeightList[i] = roiTensorPtrSrc[i].xywhROI.roiHeight / 2; } - else + } + else + { + for(int i = 0; i < batchSize ; i++) { - for(int i = 0; i < batchSize ; i++) - { - roiWidthList[i] = roiList[2]; - roiHeightList[i] = roiList[3]; - } + roiWidthList[i] = roiList[2]; + roiHeightList[i] = roiList[3]; } + } - // Uncomment to run test case with an xywhROI override - // roi.xywhROI = {0, 0, 25, 25}; - // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize); - // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize); + // Uncomment to run test case with an xywhROI override + // roi.xywhROI = {0, 0, 25, 25}; + // set_roi_values(&roi, roiTensorPtrSrc, roiTypeSrc, batchSize); + // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize); - // Uncomment to run test case with an ltrbROI override - // roiTypeSrc = RpptRoiType::LTRB; - // convert_roi(roiTensorPtrSrc, roiTypeSrc, batchSize); - // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize); + // Uncomment to run test case with an ltrbROI override + // roiTypeSrc = RpptRoiType::LTRB; + // convert_roi(roiTensorPtrSrc, roiTypeSrc, batchSize); + // update_dst_sizes_with_roi(roiTensorPtrSrc, dstImgSizes, roiTypeSrc, batchSize); + for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++) + { clock_t startCpuTime, endCpuTime; double startWallTime, endWallTime; switch (testCase) @@ -1111,119 +1111,119 @@ int main(int argc, char **argv) maxWallTime = std::max(maxWallTime, wallTime); minWallTime = std::min(minWallTime, wallTime); avgWallTime += wallTime; - cpuTime *= 1000; - wallTime *= 1000; + } + cpuTime *= 1000; + wallTime *= 1000; - if (testType == 0) - { - cout <<"\n\n"; - cout <<"CPU Backend Clock Time: "<< cpuTime <<" ms/batch"<< endl; - cout <<"CPU Backend Wall Time: "<< wallTime <<" ms/batch"<< endl; + if (testType == 0) + { + cout <<"\n\n"; + cout <<"CPU Backend Clock Time: "<< cpuTime <<" ms/batch"<< endl; + cout <<"CPU Backend Wall Time: "<< wallTime <<" ms/batch"<< endl; - if (reductionTypeCase) + if (reductionTypeCase) + { + if(srcDescPtr->c == 3) + printf("\nReduction result (Batch of 3 channel images produces 4 results per image in batch): "); + else if(srcDescPtr->c == 1) { - if(srcDescPtr->c == 3) - printf("\nReduction result (Batch of 3 channel images produces 4 results per image in batch): "); - else if(srcDescPtr->c == 1) - { - printf("\nReduction result (Batch of 1 channel images produces 1 result per image in batch): "); - reductionFuncResultArrLength = srcDescPtr->n; - } - - // print reduction functions output array based on different bit depths, and precision desired - int precision = ((dstDescPtr->dataType == RpptDataType::F32) || (dstDescPtr->dataType == RpptDataType::F16)) ? 3 : 0; - if (dstDescPtr->dataType == RpptDataType::U8) - { - if (testCase == 87) - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - else - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - } - else if (dstDescPtr->dataType == RpptDataType::F16) - { - if (testCase == 87) - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - else - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - } - else if (dstDescPtr->dataType == RpptDataType::F32) - { - if (testCase == 87) - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - else - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - } - else if (dstDescPtr->dataType == RpptDataType::I8) - { - if (testCase == 87) - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - else - print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); - } - printf("\n"); + printf("\nReduction result (Batch of 1 channel images produces 1 result per image in batch): "); + reductionFuncResultArrLength = srcDescPtr->n; + } - /*Compare the output of the function with golden outputs only if - 1.QA Flag is set - 2.input bit depth 0 (U8) - 3.source and destination layout are the same*/ - if(qaFlag && inputBitDepth == 0 && (srcDescPtr->layout == dstDescPtr->layout) && !(randomOutputCase)) - { - if (testCase == 87) - compare_reduction_output(static_cast(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath); - else - compare_reduction_output(static_cast(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath); - } + // print reduction functions output array based on different bit depths, and precision desired + int precision = ((dstDescPtr->dataType == RpptDataType::F32) || (dstDescPtr->dataType == RpptDataType::F16)) ? 3 : 0; + if (dstDescPtr->dataType == RpptDataType::U8) + { + if (testCase == 87) + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + else + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); } - else + else if (dstDescPtr->dataType == RpptDataType::F16) { - // Reconvert other bit depths to 8u for output display purposes - convert_output_bitdepth_to_u8(output, outputu8, inputBitDepth, oBufferSize, outputBufferSize, dstDescPtr, invConversionFactor); + if (testCase == 87) + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + else + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + } + else if (dstDescPtr->dataType == RpptDataType::F32) + { + if (testCase == 87) + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + else + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + } + else if (dstDescPtr->dataType == RpptDataType::I8) + { + if (testCase == 87) + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + else + print_array(static_cast(reductionFuncResultArr), reductionFuncResultArrLength, precision); + } + printf("\n"); - // If DEBUG_MODE is set to 1 dump the outputs to csv files for debugging - if(DEBUG_MODE && iterCount == 0) - { - std::ofstream refFile; - refFile.open(func + ".csv"); - for (int i = 0; i < oBufferSize; i++) - refFile << static_cast(*(outputu8 + i)) << ","; - refFile.close(); - } + /*Compare the output of the function with golden outputs only if + 1.QA Flag is set + 2.input bit depth 0 (U8) + 3.source and destination layout are the same*/ + if(qaFlag && inputBitDepth == 0 && (srcDescPtr->layout == dstDescPtr->layout) && !(randomOutputCase)) + { + if (testCase == 87) + compare_reduction_output(static_cast(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath); + else + compare_reduction_output(static_cast(reductionFuncResultArr), testCaseName, srcDescPtr, testCase, dst, scriptPath); + } + } + else + { + // Reconvert other bit depths to 8u for output display purposes + convert_output_bitdepth_to_u8(output, outputu8, inputBitDepth, oBufferSize, outputBufferSize, dstDescPtr, invConversionFactor); - /*Compare the output of the function with golden outputs only if - 1.QA Flag is set - 2.input bit depth 0 (Input U8 && Output U8) - 3.source and destination layout are the same - 4.augmentation case does not generate random output*/ - if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase)) - compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, testCase, dst, scriptPath); - - // Calculate exact dstROI in XYWH format for OpenCV dump - if (roiTypeSrc == RpptRoiType::LTRB) - convert_roi(roiTensorPtrDst, RpptRoiType::XYWH, dstDescPtr->n); - - // Check if the ROI values for each input is within the bounds of the max buffer allocated - RpptROI roiDefault; - RpptROIPtr roiPtrDefault = &roiDefault; - roiPtrDefault->xywhROI = {0, 0, static_cast(dstDescPtr->w), static_cast(dstDescPtr->h)}; - for (int i = 0; i < dstDescPtr->n; i++) - { - roiTensorPtrDst[i].xywhROI.roiWidth = std::min(roiPtrDefault->xywhROI.roiWidth - roiTensorPtrDst[i].xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.roiWidth); - roiTensorPtrDst[i].xywhROI.roiHeight = std::min(roiPtrDefault->xywhROI.roiHeight - roiTensorPtrDst[i].xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.roiHeight); - roiTensorPtrDst[i].xywhROI.xy.x = std::max(roiPtrDefault->xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.xy.x); - roiTensorPtrDst[i].xywhROI.xy.y = std::max(roiPtrDefault->xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.xy.y); - } + // If DEBUG_MODE is set to 1 dump the outputs to csv files for debugging + if(DEBUG_MODE && iterCount == 0) + { + std::ofstream refFile; + refFile.open(func + ".csv"); + for (int i = 0; i < oBufferSize; i++) + refFile << static_cast(*(outputu8 + i)) << ","; + refFile.close(); + } - // Convert any PLN3 outputs to the corresponding PKD3 version for OpenCV dump - if (layoutType == 0 || layoutType == 1) - { - if ((dstDescPtr->c == 3) && (dstDescPtr->layout == RpptLayout::NCHW)) - convert_pln3_to_pkd3(outputu8, dstDescPtr); - } + /*Compare the output of the function with golden outputs only if + 1.QA Flag is set + 2.input bit depth 0 (Input U8 && Output U8) + 3.source and destination layout are the same + 4.augmentation case does not generate random output*/ + if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase)) + compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, testCase, dst, scriptPath); + + // Calculate exact dstROI in XYWH format for OpenCV dump + if (roiTypeSrc == RpptRoiType::LTRB) + convert_roi(roiTensorPtrDst, RpptRoiType::XYWH, dstDescPtr->n); + + // Check if the ROI values for each input is within the bounds of the max buffer allocated + RpptROI roiDefault; + RpptROIPtr roiPtrDefault = &roiDefault; + roiPtrDefault->xywhROI = {0, 0, static_cast(dstDescPtr->w), static_cast(dstDescPtr->h)}; + for (int i = 0; i < dstDescPtr->n; i++) + { + roiTensorPtrDst[i].xywhROI.roiWidth = std::min(roiPtrDefault->xywhROI.roiWidth - roiTensorPtrDst[i].xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.roiWidth); + roiTensorPtrDst[i].xywhROI.roiHeight = std::min(roiPtrDefault->xywhROI.roiHeight - roiTensorPtrDst[i].xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.roiHeight); + roiTensorPtrDst[i].xywhROI.xy.x = std::max(roiPtrDefault->xywhROI.xy.x, roiTensorPtrDst[i].xywhROI.xy.x); + roiTensorPtrDst[i].xywhROI.xy.y = std::max(roiPtrDefault->xywhROI.xy.y, roiTensorPtrDst[i].xywhROI.xy.y); + } - // OpenCV dump (if testType is unit test and QA mode is not set) - if(!qaFlag) - write_image_batch_opencv(dst, outputu8, dstDescPtr, imageNamesStart, dstImgSizes, MAX_IMAGE_DUMP); + // Convert any PLN3 outputs to the corresponding PKD3 version for OpenCV dump + if (layoutType == 0 || layoutType == 1) + { + if ((dstDescPtr->c == 3) && (dstDescPtr->layout == RpptLayout::NCHW)) + convert_pln3_to_pkd3(outputu8, dstDescPtr); } + + // OpenCV dump (if testType is unit test and QA mode is not set) + if(!qaFlag) + write_image_batch_opencv(dst, outputu8, dstDescPtr, imageNamesStart, dstImgSizes, MAX_IMAGE_DUMP); } } } diff --git a/utilities/test_suite/HOST/Tensor_host_audio.cpp b/utilities/test_suite/HOST/Tensor_host_audio.cpp index fe6fa1246..c7768b9dd 100644 --- a/utilities/test_suite/HOST/Tensor_host_audio.cpp +++ b/utilities/test_suite/HOST/Tensor_host_audio.cpp @@ -132,13 +132,12 @@ int main(int argc, char **argv) double maxWallTime = 0, minWallTime = 500, avgWallTime = 0; string testCaseName; printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", func.c_str(), numRuns, batchSize); - for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++) + for (int iterCount = 0; iterCount < noOfIterations; iterCount++) { - for (int iterCount = 0; iterCount < noOfIterations; iterCount++) + // read and decode audio and fill the audio dim values + read_audio_batch_and_fill_dims(srcDescPtr, inputf32, audioFilesPath, iterCount, srcLengthTensor, channelsTensor); + for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++) { - // read and decode audio and fill the audio dim values - read_audio_batch_and_fill_dims(srcDescPtr, inputf32, audioFilesPath, iterCount, srcLengthTensor, channelsTensor); - double startWallTime, endWallTime; double wallTime; switch (testCase) @@ -234,28 +233,28 @@ int main(int argc, char **argv) maxWallTime = std::max(maxWallTime, wallTime); minWallTime = std::min(minWallTime, wallTime); avgWallTime += wallTime; + } - // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests - if (testType == 0) + // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests + if (testType == 0) + { + /* Run only if testCase is not 0 + For testCase 0 verify_non_silent_region_detection function is used for QA testing */ + if (testCase != 0) + verify_output(outputf32, dstDescPtr, dstDims, testCaseName, dst, scriptPath); + + /* Dump the outputs to csv files for debugging + Runs only if + 1. DEBUG_MODE is enabled + 2. Current iteration is 1st iteration + 3. Test case is not 0 */ + if (DEBUG_MODE && iterCount == 0 && testCase != 0) { - /* Run only if testCase is not 0 - For testCase 0 verify_non_silent_region_detection function is used for QA testing */ - if (testCase != 0) - verify_output(outputf32, dstDescPtr, dstDims, testCaseName, dst, scriptPath); - - /* Dump the outputs to csv files for debugging - Runs only if - 1. DEBUG_MODE is enabled - 2. Current iteration is 1st iteration - 3. Test case is not 0 */ - if (DEBUG_MODE && iterCount == 0 && testCase != 0) - { - std::ofstream refFile; - refFile.open(func + ".csv"); - for (int i = 0; i < oBufferSize; i++) - refFile << *(outputf32 + i) << "\n"; - refFile.close(); - } + std::ofstream refFile; + refFile.open(func + ".csv"); + for (int i = 0; i < oBufferSize; i++) + refFile << *(outputf32 + i) << "\n"; + refFile.close(); } } } diff --git a/utilities/test_suite/HOST/Tensor_voxel_host.cpp b/utilities/test_suite/HOST/Tensor_voxel_host.cpp index 0198c3ca0..260ccbe3a 100644 --- a/utilities/test_suite/HOST/Tensor_voxel_host.cpp +++ b/utilities/test_suite/HOST/Tensor_voxel_host.cpp @@ -148,8 +148,7 @@ int main(int argc, char * argv[]) // Run case-wise RPP API and measure time int missingFuncFlag = 0; - double startWallTime, endWallTime, wallTime; - double maxWallTime = 0, minWallTime = 5000, avgWallTime = 0; + double maxWallTime = 0, minWallTime = 5000, avgWallTime = 0, wallTime = 0; int noOfIterations = (int)noOfFiles / batchSize; string testCaseName; @@ -163,59 +162,60 @@ int main(int argc, char * argv[]) } printf("\nRunning %s %d times (each time with a batch size of %d images) and computing mean statistics...", funcName.c_str(), numRuns, batchSize); - for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++) + for(int iterCount = 0; iterCount < noOfIterations; iterCount++) { - for(int iterCount = 0; iterCount < noOfIterations; iterCount++) - { - vector::const_iterator dataFilePathStart = dataFilePath.begin() + (iterCount * batchSize); - vector::const_iterator dataFilePathEnd = dataFilePathStart + batchSize; - nifti_1_header *niftiHeaderTemp = niftiHeader + batchSize * iterCount; + vector::const_iterator dataFilePathStart = dataFilePath.begin() + (iterCount * batchSize); + vector::const_iterator dataFilePathEnd = dataFilePathStart + batchSize; + nifti_1_header *niftiHeaderTemp = niftiHeader + batchSize * iterCount; - read_nifti_data(dataFilePathStart, dataFilePathEnd, niftiDataArray, niftiHeaderTemp); + read_nifti_data(dataFilePathStart, dataFilePathEnd, niftiDataArray, niftiHeaderTemp); - // optionally pick full image as ROI or a smaller slice of the 3D tensor in X/Y/Z dimensions - for(int i = 0; i < batchSize; i++) - { - // option 1 - test using roi as the whole 3D image - not sliced (example for 240 x 240 x 155 x 1) - roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0; // start X dim = 0 - roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 - roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0; // start Z dim = 0 - roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeaderTemp[i].dim[1]; // length in X dim - roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeaderTemp[i].dim[2]; // length in Y dim - roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeaderTemp[i].dim[3]; // length in Z dim - // option 2 - test using roi as a smaller 3D tensor slice - sliced in X, Y and Z dims (example for 240 x 240 x 155 x 1) - // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader.dim[1] / 4; // start X dim = 60 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = niftiHeader[i].dim[2] / 4; // start Y dim = 60 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 - // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] / 2; // length in X dim = 120 - // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2] / 2; // length in Y dim = 120 - // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 - // option 3 - test using roi as a smaller 3D tensor slice - sliced in only Z dim (example for 240 x 240 x 155 x 1) - // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0; // start X dim = 0 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 - // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1]; // length in X dim = 240 - // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2]; // length in Y dim = 240 - // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 - // option 4 - test using roi as a smaller 3D tensor slice - sliced in only X and Z dim (example for 240 x 240 x 155 x 1) - // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader[i].dim[1] / 5; // start X dim = 48 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 - // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 - // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] * 3 / 5; // length in X dim = 144 - // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2]; // length in Y dim = 240 - // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 - } + // optionally pick full image as ROI or a smaller slice of the 3D tensor in X/Y/Z dimensions + for(int i = 0; i < batchSize; i++) + { + // option 1 - test using roi as the whole 3D image - not sliced (example for 240 x 240 x 155 x 1) + roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0; // start X dim = 0 + roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 + roiGenericSrcPtr[i].xyzwhdROI.xyz.z = 0; // start Z dim = 0 + roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeaderTemp[i].dim[1]; // length in X dim + roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeaderTemp[i].dim[2]; // length in Y dim + roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeaderTemp[i].dim[3]; // length in Z dim + // option 2 - test using roi as a smaller 3D tensor slice - sliced in X, Y and Z dims (example for 240 x 240 x 155 x 1) + // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader.dim[1] / 4; // start X dim = 60 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = niftiHeader[i].dim[2] / 4; // start Y dim = 60 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 + // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] / 2; // length in X dim = 120 + // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2] / 2; // length in Y dim = 120 + // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 + // option 3 - test using roi as a smaller 3D tensor slice - sliced in only Z dim (example for 240 x 240 x 155 x 1) + // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = 0; // start X dim = 0 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 + // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1]; // length in X dim = 240 + // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2]; // length in Y dim = 240 + // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 + // option 4 - test using roi as a smaller 3D tensor slice - sliced in only X and Z dim (example for 240 x 240 x 155 x 1) + // roiGenericSrcPtr[i].xyzwhdROI.xyz.x = niftiHeader[i].dim[1] / 5; // start X dim = 48 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.y = 0; // start Y dim = 0 + // roiGenericSrcPtr[i].xyzwhdROI.xyz.z = niftiHeader[i].dim[3] / 3; // start Z dim = 51 + // roiGenericSrcPtr[i].xyzwhdROI.roiWidth = niftiHeader[i].dim[1] * 3 / 5; // length in X dim = 144 + // roiGenericSrcPtr[i].xyzwhdROI.roiHeight = niftiHeader[i].dim[2]; // length in Y dim = 240 + // roiGenericSrcPtr[i].xyzwhdROI.roiDepth = niftiHeader[i].dim[3] / 3; // length in Z dim = 51 + } - // Convert default NIFTI_DATATYPE unstrided buffer to RpptDataType::F32 strided buffer - convert_input_niftitype_to_Rpp32f_generic(niftiDataArray, niftiHeaderTemp, inputF32 , descriptorPtr3D); + // Convert default NIFTI_DATATYPE unstrided buffer to RpptDataType::F32 strided buffer + convert_input_niftitype_to_Rpp32f_generic(niftiDataArray, niftiHeaderTemp, inputF32 , descriptorPtr3D); - // Typecast input from F32 to U8 if input bitdepth requested is U8 - if (inputBitDepth == 0) - { - for(int i = 0; i < iBufferSizeU8; i++) - inputU8[i] = std::min(std::max(static_cast(inputF32[i]), static_cast(0)), static_cast(255)); - } + // Typecast input from F32 to U8 if input bitdepth requested is U8 + if (inputBitDepth == 0) + { + for(int i = 0; i < iBufferSizeU8; i++) + inputU8[i] = std::min(std::max(static_cast(inputF32[i]), static_cast(0)), static_cast(255)); + } + for (int perfRunCount = 0; perfRunCount < numRuns; perfRunCount++) + { + double startWallTime, endWallTime; switch (testCase) { case 0: @@ -311,106 +311,107 @@ int main(int argc, char * argv[]) maxWallTime = std::max(maxWallTime, wallTime); minWallTime = std::min(minWallTime, wallTime); avgWallTime += wallTime; - wallTime *= 1000; + if (missingFuncFlag == 1) { printf("\nThe functionality doesn't yet exist in RPP\n"); return -1; } + } - if(testType == 0) + wallTime *= 1000; + if(testType == 0) + { + cout << "\n\nCPU Backend Wall Time: " << wallTime <<" ms per batch"<< endl; + if(DEBUG_MODE) { - cout << "\n\nCPU Backend Wall Time: " << wallTime <<" ms per nifti file"<< endl; - if(DEBUG_MODE) - { - std::ofstream refFile; - std::string refFileName; - if(layoutType == 0) - refFileName = testCaseName + "_nifti_host_pkd3.csv"; - else if(layoutType == 1) - refFileName = testCaseName + "_nifti_host_pln3.csv"; - else - refFileName = testCaseName + "_nifti_host_pln1.csv"; - refFile.open(refFileName); - for (int i = 0; i < oBufferSize; i++) - refFile << *(outputF32 + i) << ","; - refFile.close(); - } + std::ofstream refFile; + std::string refFileName; + if(layoutType == 0) + refFileName = testCaseName + "_nifti_host_pkd3.csv"; + else if(layoutType == 1) + refFileName = testCaseName + "_nifti_host_pln3.csv"; + else + refFileName = testCaseName + "_nifti_host_pln1.csv"; + refFile.open(refFileName); + for (int i = 0; i < oBufferSize; i++) + refFile << *(outputF32 + i) << ","; + refFile.close(); + } - if(inputBitDepth == 0) - { - Rpp64u bufferLength = iBufferSize * sizeof(Rpp8u) + descriptorPtr3D->offsetInBytes; + if(inputBitDepth == 0) + { + Rpp64u bufferLength = iBufferSize * sizeof(Rpp8u) + descriptorPtr3D->offsetInBytes; - // Copy U8 buffer to F32 buffer for display purposes - for(int i = 0; i < bufferLength; i++) - outputF32[i] = static_cast(outputU8[i]); - } + // Copy U8 buffer to F32 buffer for display purposes + for(int i = 0; i < bufferLength; i++) + outputF32[i] = static_cast(outputU8[i]); + } - /*Compare the output of the function with golden outputs only if - 1.QA Flag is set - 2.input bit depth 2 (F32)*/ - if(qaFlag && inputBitDepth == 2) - compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath); - else + /*Compare the output of the function with golden outputs only if + 1.QA Flag is set + 2.input bit depth 2 (F32)*/ + if(qaFlag && inputBitDepth == 2) + compare_output(outputF32, oBufferSize, testCaseName, layoutType, descriptorPtr3D, (RpptRoiXyzwhd *)roiGenericSrcPtr, dstPath, scriptPath); + else + { + for(int batchCount = 0; batchCount < batchSize; batchCount++) { - for(int batchCount = 0; batchCount < batchSize; batchCount++) + int index = iterCount * batchSize + batchCount; + Rpp32f *outputTemp = outputF32 + batchCount * descriptorPtr3D->strides[0]; + for(int i = 0; i < numChannels; i++) // temporary changes to process pln3 { - int index = iterCount * batchSize + batchCount; - Rpp32f *outputTemp = outputF32 + batchCount * descriptorPtr3D->strides[0]; - for(int i = 0; i < numChannels; i++) // temporary changes to process pln3 + int xyFrameSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2]; + int xyFrameSizeROI = roiGenericSrcPtr[batchCount].xyzwhdROI.roiWidth * roiGenericSrcPtr[batchCount].xyzwhdROI.roiHeight; + + uint dataSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2] * niftiHeaderTemp[batchCount].dim[3]; + uchar *niftiDataU8 = (uchar *) malloc(dataSize * sizeof(uchar)); + uchar *outputBufferOpenCV = (uchar *)calloc(xyFrameSizeROI, sizeof(uchar)); + + // Convert RpptDataType::F32 strided buffer to default NIFTI_DATATYPE unstrided buffer + Rpp64u increment; + if (descriptorPtr3D->layout == RpptLayout::NCDHW) + increment = (Rpp64u)descriptorPtr3D->strides[1]; + else + increment = 1; + convert_output_Rpp32f_to_niftitype_generic(outputTemp + i * increment, descriptorPtr3D, niftiDataArray[batchCount], &niftiHeaderTemp[batchCount]); + NIFTI_DATATYPE min = niftiDataArray[batchCount][0]; + NIFTI_DATATYPE max = niftiDataArray[batchCount][0]; + for (int i = 0; i < dataSize; i++) + { + min = std::min(min, niftiDataArray[batchCount][i]); + max = std::max(max, niftiDataArray[batchCount][i]); + } + Rpp32f multiplier = 255.0f / (max - min); + for (int i = 0; i < dataSize; i++) + niftiDataU8[i] = (uchar)((niftiDataArray[batchCount][i] - min) * multiplier); + + uchar *niftiDataU8Temp = niftiDataU8; + for (int zPlane = roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z; zPlane < roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z + roiGenericSrcPtr[batchCount].xyzwhdROI.roiDepth; zPlane++) + { + write_image_from_nifti_opencv(niftiDataU8Temp, niftiHeaderTemp[batchCount].dim[1], (RpptRoiXyzwhd *)roiGenericSrcPtr, outputBufferOpenCV, zPlane, i, batchCount, dstPath, testCaseName, index); + niftiDataU8Temp += xyFrameSize; + } + + write_nifti_file(&niftiHeaderTemp[batchCount], niftiDataArray[batchCount], index, i, dstPath, testCaseName); + + if(i == 0) + { + std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_0_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; + system(command.c_str()); + } + if(i == 1) + { + std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_1_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; + system(command.c_str()); + } + if(i == 2) { - int xyFrameSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2]; - int xyFrameSizeROI = roiGenericSrcPtr[batchCount].xyzwhdROI.roiWidth * roiGenericSrcPtr[batchCount].xyzwhdROI.roiHeight; - - uint dataSize = niftiHeaderTemp[batchCount].dim[1] * niftiHeaderTemp[batchCount].dim[2] * niftiHeaderTemp[batchCount].dim[3]; - uchar *niftiDataU8 = (uchar *) malloc(dataSize * sizeof(uchar)); - uchar *outputBufferOpenCV = (uchar *)calloc(xyFrameSizeROI, sizeof(uchar)); - - // Convert RpptDataType::F32 strided buffer to default NIFTI_DATATYPE unstrided buffer - Rpp64u increment; - if (descriptorPtr3D->layout == RpptLayout::NCDHW) - increment = (Rpp64u)descriptorPtr3D->strides[1]; - else - increment = 1; - convert_output_Rpp32f_to_niftitype_generic(outputTemp + i * increment, descriptorPtr3D, niftiDataArray[batchCount], &niftiHeaderTemp[batchCount]); - NIFTI_DATATYPE min = niftiDataArray[batchCount][0]; - NIFTI_DATATYPE max = niftiDataArray[batchCount][0]; - for (int i = 0; i < dataSize; i++) - { - min = std::min(min, niftiDataArray[batchCount][i]); - max = std::max(max, niftiDataArray[batchCount][i]); - } - Rpp32f multiplier = 255.0f / (max - min); - for (int i = 0; i < dataSize; i++) - niftiDataU8[i] = (uchar)((niftiDataArray[batchCount][i] - min) * multiplier); - - uchar *niftiDataU8Temp = niftiDataU8; - for (int zPlane = roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z; zPlane < roiGenericSrcPtr[batchCount].xyzwhdROI.xyz.z + roiGenericSrcPtr[batchCount].xyzwhdROI.roiDepth; zPlane++) - { - write_image_from_nifti_opencv(niftiDataU8Temp, niftiHeaderTemp[batchCount].dim[1], (RpptRoiXyzwhd *)roiGenericSrcPtr, outputBufferOpenCV, zPlane, i, batchCount, dstPath, testCaseName, index); - niftiDataU8Temp += xyFrameSize; - } - - write_nifti_file(&niftiHeaderTemp[batchCount], niftiDataArray[batchCount], index, i, dstPath, testCaseName); - - if(i == 0) - { - std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_0_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; - system(command.c_str()); - } - if(i == 1) - { - std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_1_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; - system(command.c_str()); - } - if(i == 2) - { - std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_2_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; - system(command.c_str()); - } - free(niftiDataU8); - free(outputBufferOpenCV); + std::string command = "convert -delay 10 -loop 0 " + std::string(dstPath) + "/" + testCaseName + "_nifti_" + std::to_string(index) + "_zPlane_chn_2_*.jpg " + std::string(dstPath) + "/" + testCaseName + "_niftiOutput_" + std::to_string(index) + "_chn_" + std::to_string(i) + ".gif"; + system(command.c_str()); } + free(niftiDataU8); + free(outputBufferOpenCV); } } } diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py index 70ec00026..db54cf53e 100644 --- a/utilities/test_suite/HOST/runAudioTests.py +++ b/utilities/test_suite/HOST/runAudioTests.py @@ -23,11 +23,10 @@ """ import os -import subprocess # nosec -import argparse import sys -import datetime -import shutil +sys.dont_write_bytecode = True +sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) +from common import * # Set the timestamp timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") @@ -39,56 +38,6 @@ caseMin = 0 caseMax = 3 -# Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents -def validate_and_remove_files(path): - if not path: # check if a string is empty - print("Folder path is empty.") - exit(0) - - elif path == "/*": # check if the root directory is passed to the function - print("Root folder cannot be deleted.") - exit(0) - - elif os.path.exists(path): # check if the folder exists - # Get a list of files and directories within the specified path - items = os.listdir(path) - - if items: - # The directory is not empty, delete its contents - for item in items: - item_path = os.path.join(path, item) - if os.path.isfile(item_path): - os.remove(item_path) - elif os.path.isdir(item_path): - shutil.rmtree(item_path) # Delete the directory if it exists - - else: - print("Path is invalid or does not exist.") - exit(0) - -# Check if the folder is the root folder or exists, and remove the specified subfolders -def validate_and_remove_folders(path, folder): - if path == "/*": # check if the root directory is passed to the function - print("Root folder cannot be deleted.") - exit(0) - if path and os.path.isdir(path): # checks if directory string is not empty and it exists - output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)] - - # Loop through each directory and delete it only if it exists - for folder_name in output_folders: - folder_path = os.path.join(path, folder_name) - if os.path.isdir(folder_path): - shutil.rmtree(folder_path) # Delete the directory if it exists - print("Deleted directory:", folder_path) - else: - print("Directory not found:", folder_path) - -# Validate if a path exists and is a directory -def validate_path(input_path): - if not os.path.exists(input_path): - raise ValueError("path " + input_path +" does not exist.") - if not os.path.isdir(input_path): - raise ValueError("path " + input_path + " is not a directory.") # Get a list of log files based on a flag for preserving output def get_log_file_list(): @@ -229,6 +178,9 @@ def rpp_test_suite_parser_and_validator(): subprocess.run(["cmake", scriptPath], cwd=".") # nosec subprocess.run(["make", "-j16"], cwd=".") # nosec +# List of cases supported +supportedCaseList = ['0', '1', '2', '3'] + if testType == 0: if batchSize != 3: print("QA tests can only run with a batch size of 3.") @@ -240,8 +192,7 @@ def rpp_test_suite_parser_and_validator(): srcPath = scriptPath + "/../TEST_AUDIO_FILES/three_sample_multi_channel_src1" else: srcPath = inFilePath - if int(case) < 0 or int(case) > 3: - print(f"Invalid case number {case}. Case number must be 0-3 range!") + if case not in supportedCaseList: continue run_unit_test(srcPath, case, numRuns, testType, batchSize, outFilePath) @@ -252,88 +203,24 @@ def rpp_test_suite_parser_and_validator(): srcPath = scriptPath + "/../TEST_AUDIO_FILES/three_sample_multi_channel_src1" else: srcPath = inFilePath - if int(case) < 0 or int(case) > 3: - print(f"Invalid case number {case}. Case number must be 0-3 range!") + if case not in supportedCaseList: continue run_performance_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath) # print the results of qa tests -supportedCaseList = ['0', '1', '2', '3'] nonQACaseList = [] # Add cases present in supportedCaseList, but without QA support if testType == 0: qaFilePath = os.path.join(outFilePath, "QA_results.txt") checkFile = os.path.isfile(qaFilePath) if checkFile: - f = open(qaFilePath, 'r+') print("---------------------------------- Results of QA Test - Tensor_host_audio -----------------------------------\n") - numLines = 0 - numPassed = 0 - for line in f: - sys.stdout.write(line) - numLines += 1 - if "PASSED" in line: - numPassed += 1 - sys.stdout.flush() - resultsInfo = "\n\nFinal Results of Tests:" - resultsInfo += "\n - Total test cases including all subvariants REQUESTED = " + str(numLines) - resultsInfo += "\n - Total test cases including all subvariants PASSED = " + str(numPassed) - resultsInfo += "\n\nGeneral information on Tensor test suite availability:" - resultsInfo += "\n - Total augmentations supported in Tensor audio test suite = " + str(len(supportedCaseList)) - resultsInfo += "\n - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList)) - resultsInfo += "\n - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList)) - f.write(resultsInfo) - print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------") + print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList) # Performance tests if (testType == 1): log_file_list = get_log_file_list() + for log_file in log_file_list: + print_performance_tests_summary(log_file, "", numRuns) - try: - f = open(log_file_list[0], "r") - print("\n\n\nOpened log file -> "+ log_file_list[0]) - except IOError: - print("Skipping file -> "+ log_file_list[0]) - exit(0) - - # Initialize data structures to store the parsed data - functions = [] - max_wall_times = [] - min_wall_times = [] - avg_wall_times = [] - prev_line = "" - funcCount = 0 - - for line in f: - if "max,min,avg wall times in ms/batch" in line: - split_word_start = "Running " - split_word_end = " " + str(numRuns) - prev_line = prev_line.partition(split_word_start)[2].partition(split_word_end)[0] - if prev_line not in functions: - functions.append(prev_line) - split_word_start = "max,min,avg wall times in ms/batch = " - split_word_end = "\n" - stats = line.partition(split_word_start)[2].partition(split_word_end)[0].split(",") - max_wall_times.append(float(stats[0])) - min_wall_times.append(float(stats[1])) - avg_wall_times.append(float(stats[2])) - funcCount += 1 - - if line != "\n": - prev_line = line - - # Print log lengths - print("Functionalities - "+ str(funcCount)) - - # Print the summary in a well-formatted table - print("\n\nFunctionality\t\t\t\t\t\tnumRuns\t\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n") - - if len(functions) > 0: - max_func_length = max(len(func) for func in functions) - - for i, func in enumerate(functions): - print("{func}\t\t\t\t{numRuns}\t{:<15.6f}\t{:<15.6f}\t{:<15.6f}".format( - max_wall_times[i], min_wall_times[i], avg_wall_times[i], func=func, numRuns=numRuns)) - else: - print("No functionality data found in the log file.") \ No newline at end of file diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index b08c4d5e8..9aa92d9c4 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -23,12 +23,10 @@ """ import os -import subprocess # nosec -import argparse import sys -import datetime -import shutil -import pandas as pd +sys.dont_write_bytecode = True +sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) +from common import * # Set the timestamp timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") @@ -41,69 +39,8 @@ perfQaInputFile = scriptPath + "/../TEST_IMAGES/eight_images_mixed_src1" outFolderPath = os.getcwd() buildFolderPath = os.getcwd() - -# Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents -def validate_and_remove_files(path): - if not path: # check if a string is empty - print("Folder path is empty.") - exit() - - elif path == "/*": # check if the root directory is passed to the function - print("Root folder cannot be deleted.") - exit() - - elif os.path.exists(path): # check if the folder exists - # Get a list of files and directories within the specified path - items = os.listdir(path) - - if items: - # The directory is not empty, delete its contents - for item in items: - item_path = os.path.join(path, item) - if os.path.isfile(item_path): - os.remove(item_path) - elif os.path.isdir(item_path): - shutil.rmtree(item_path) # Delete the directory if it exists - - else: - print("Path is invalid or does not exist.") - exit() - -# Check if the folder is the root folder or exists, and remove the specified subfolders -def validate_and_remove_folders(path, folder): - if path == "/*": # check if the root directory is passed to the function - print("Root folder cannot be deleted.") - exit() - if path and os.path.isdir(path): # checks if directory string is not empty and it exists - output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)] - - # Loop through each directory and delete it only if it exists - for folder_name in output_folders: - folder_path = os.path.join(path, folder_name) - if os.path.isdir(folder_path): - shutil.rmtree(folder_path) # Delete the directory if it exists - print("Deleted directory:", folder_path) - else: - print("Directory not found:", folder_path) - -# Create layout directories within a destination path based on a layout dictionary -def create_layout_directories(dst_path, layout_dict): - for layout in range(3): - current_layout = layout_dict[layout] - try: - os.makedirs(dst_path + '/' + current_layout) - except FileExistsError: - pass - folder_list = [f for f in os.listdir(dst_path) if current_layout.lower() in f] - for folder in folder_list: - os.rename(dst_path + '/' + folder, dst_path + '/' + current_layout + '/' + folder) - -# Validate if a path exists and is a directory -def validate_path(input_path): - if not os.path.exists(input_path): - raise ValueError("path " + input_path +" does not exist.") - if not os.path.isdir(input_path): - raise ValueError("path " + input_path + " is not a directory.") +caseMin = 0 +caseMax = 89 # Get a list of log files based on a flag for preserving output def get_log_file_list(preserveOutput): @@ -130,43 +67,20 @@ def func_group_finder(case_number): else: return "miscellaneous" -# Generate a directory name based on certain parameters -def directory_name_generator(qaMode, affinity, layoutType, case, path): - if qaMode == 0: - functionality_group = func_group_finder(int(case)) - dst_folder_temp = "{}/rpp_{}_{}_{}".format(path, affinity, layoutType, functionality_group) - else: - dst_folder_temp = path - - return dst_folder_temp - -# Process the layout based on the given parameters and generate the directory name and log file layout. -def process_layout(layout, qaMode, case, dstPath): - if layout == 0: - dstPathTemp = directory_name_generator(qaMode, "host", "pkd3", case, dstPath) - log_file_layout = "pkd3" - elif layout == 1: - dstPathTemp = directory_name_generator(qaMode, "host", "pln3", case, dstPath) - log_file_layout = "pln3" - elif layout == 2: - dstPathTemp = directory_name_generator(qaMode, "host", "pln1", case, dstPath) - log_file_layout = "pln1" - - return dstPathTemp, log_file_layout - def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList): print("\n\n\n\n") print("--------------------------------") print("Running a New Functionality...") print("--------------------------------") + bitDepths = range(7) + outputFormatToggles = [0, 1] if qaMode: - maxBitdepth = 1 - else: - maxBitdepth = 7 - for bitDepth in range(maxBitdepth): + bitDepths = [0] + outputFormatToggles = [0] + for bitDepth in bitDepths: print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n") - for outputFormatToggle in range(2): + for outputFormatToggle in outputFormatToggles: # There is no layout toggle for PLN1 case, so skip this case if layout == 2 and outputFormatToggle == 1: continue @@ -216,11 +130,10 @@ def run_performance_test(loggingFolder, log_file_layout, srcPath1, srcPath2, dst print("--------------------------------") print("Running a New Functionality...") print("--------------------------------") + bitDepths = range(7) if qaMode: - maxBitdepth = 1 - else: - maxBitdepth = 7 - for bitDepth in range(maxBitdepth): + bitDepths = [0] + for bitDepth in bitDepths: print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n") for outputFormatToggle in range(2): @@ -244,8 +157,8 @@ def rpp_test_suite_parser_and_validator(): parser = argparse.ArgumentParser() parser.add_argument("--input_path1", type = str, default = inFilePath1, help = "Path to the input folder 1") parser.add_argument("--input_path2", type = str, default = inFilePath2, help = "Path to the input folder 2") - parser.add_argument("--case_start", type = int, default = 0, help = "Testing range starting case # - (0:89)") - parser.add_argument("--case_end", type = int, default = 89, help = "Testing range ending case # - (0:89)") + parser.add_argument("--case_start", type = int, default = caseMin, help = "Testing start case # - Range must be in [" + str(caseMin) + ":" + str(caseMax) + "]") + parser.add_argument("--case_end", type = int, default = caseMax, help = "Testing end case # - Range must be in [" + str(caseMin) + ":" + str(caseMax) + "]") parser.add_argument('--test_type', type = int, default = 0, help = "Type of Test - (0 = Unit tests / 1 = Performance tests)") parser.add_argument('--case_list', nargs = "+", help = "List of case numbers to list", required = False) parser.add_argument('--qa_mode', type = int, default = 0, help = "Run with qa_mode? Output images from tests will be compared with golden outputs - (0 / 1)", required = False) @@ -263,8 +176,8 @@ def rpp_test_suite_parser_and_validator(): validate_path(perfQaInputFile) # validate the parameters passed by user - if ((args.case_start < 0 or args.case_start > 89) or (args.case_end < 0 or args.case_end > 89)): - print("Starting case# and Ending case# must be in the 0:89 range. Aborting!") + if ((args.case_start < caseMin or args.case_start > caseMax) or (args.case_end < caseMin or args.case_end > caseMax)): + print(f"Starting case# and Ending case# must be in the {caseMin}:{caseMax} range. Aborting!") exit(0) elif args.case_end < args.case_start: print("Ending case# must be greater than starting case#. Aborting!") @@ -278,7 +191,7 @@ def rpp_test_suite_parser_and_validator(): elif args.decoder_type < 0 or args.decoder_type > 1: print("Decoder Type must be in the 0/1 (0 = OpenCV / 1 = TurboJPEG). Aborting") exit(0) - elif args.case_list is not None and args.case_start > 0 and args.case_end < 89: + elif args.case_list is not None and args.case_start > caseMin and args.case_end < caseMax: print("Invalid input! Please provide only 1 option between case_list, case_start and case_end") exit(0) elif args.num_runs <= 0: @@ -302,9 +215,9 @@ def rpp_test_suite_parser_and_validator(): args.case_list = [str(x) for x in args.case_list] else: for case in args.case_list: - if int(case) < 0 or int(case) > 89: - print("The case# must be in the 0:89 range!") - exit(0) + if int(case) < caseMin or int(case) > caseMax: + print(f"Invalid case number {case}! Case number must be in the {caseMin}:{caseMax} range. Aborting!") + exit(0) return args @@ -339,7 +252,7 @@ def rpp_test_suite_parser_and_validator(): numRuns = 1 elif(testType == 1): if "--num_runs" not in sys.argv: - numRuns = 1000 #default numRuns for running performance tests + numRuns = 100 #default numRuns for running performance tests outFilePath = outFolderPath + "/OUTPUT_PERFORMANCE_LOGS_HOST_" + timestamp else: print("Invalid TEST_TYPE specified. TEST_TYPE should be 0/1 (0 = Unittests / 1 = Performancetests)") @@ -367,6 +280,9 @@ def rpp_test_suite_parser_and_validator(): subprocess.run(["cmake", scriptPath], cwd=".") # nosec subprocess.run(["make", "-j16"], cwd=".") # nosec +# List of cases supported +supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '34', '36', '37', '38', '39', '45', '54', '61', '63', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89'] + print("\n\n\n\n\n") print("##########################################################################################") print("Running all layout Inputs...") @@ -374,6 +290,8 @@ def rpp_test_suite_parser_and_validator(): if testType == 0: for case in caseList: + if case not in supportedCaseList: + continue if case == "82" and (("--input_path1" not in sys.argv and "--input_path2" not in sys.argv) or qaMode == 1): srcPath1 = ricapInFilePath srcPath2 = ricapInFilePath @@ -381,11 +299,8 @@ def rpp_test_suite_parser_and_validator(): if qaMode == 1 and case != "82": srcPath1 = inFilePath1 srcPath2 = inFilePath2 - if int(case) < 0 or int(case) > 89: - print(f"Invalid case number {case}. Case number must be in the range of 0 to 89!") - continue for layout in range(3): - dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath) + dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder) if qaMode == 0: if not os.path.isdir(dstPathTemp): @@ -397,8 +312,7 @@ def rpp_test_suite_parser_and_validator(): create_layout_directories(dstPath, layoutDict) else: for case in caseList: - if int(case) < 0 or int(case) > 89: - print(f"Invalid case number {case}. Case number must be in the range of 0 to 89!") + if case not in supportedCaseList: continue # if QA mode is enabled overwrite the input folders with the folders used for generating golden outputs if qaMode == 1 and case != "82": @@ -408,36 +322,18 @@ def rpp_test_suite_parser_and_validator(): srcPath1 = ricapInFilePath srcPath2 = ricapInFilePath for layout in range(3): - dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath) + dstPathTemp, log_file_layout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder) run_performance_test(loggingFolder, log_file_layout, srcPath1, srcPath2, dstPath, case, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) # print the results of qa tests -supportedCaseList = ['0', '1', '2', '4', '8', '13', '20', '21', '23', '29', '30', '31', '34', '36', '37', '38', '39', '45', '54', '61', '63', '70', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89'] nonQACaseList = ['8', '24', '54', '84'] # Add cases present in supportedCaseList, but without QA support if qaMode and testType == 0: qaFilePath = os.path.join(outFilePath, "QA_results.txt") checkFile = os.path.isfile(qaFilePath) if checkFile: - f = open(qaFilePath, 'r+') print("---------------------------------- Results of QA Test - Tensor_host ----------------------------------\n") - numLines = 0 - numPassed = 0 - for line in f: - sys.stdout.write(line) - numLines += 1 - if "PASSED" in line: - numPassed += 1 - sys.stdout.flush() - resultsInfo = "\n\nFinal Results of Tests:" - resultsInfo += "\n - Total test cases including all subvariants REQUESTED = " + str(numLines) - resultsInfo += "\n - Total test cases including all subvariants PASSED = " + str(numPassed) - resultsInfo += "\n\nGeneral information on Tensor test suite availability:" - resultsInfo += "\n - Total augmentations supported in Tensor test suite = " + str(len(supportedCaseList)) - resultsInfo += "\n - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList)) - resultsInfo += "\n - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList)) - f.write(resultsInfo) - print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------") + print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList) layoutDict = {0:"PKD3", 1:"PLN3", 2:"PLN1"} # unit tests and QA mode disabled @@ -596,63 +492,4 @@ def rpp_test_suite_parser_and_validator(): ] for log_file in log_file_list: - # Opening log file - try: - f = open(log_file,"r") - print("\n\n\nOpened log file -> "+ log_file) - except IOError: - print("Skipping file -> "+ log_file) - continue - - stats = [] - maxVals = [] - minVals = [] - avgVals = [] - functions = [] - frames = [] - prevLine = "" - funcCount = 0 - - # Loop over each line - for line in f: - for functionality_group in functionality_group_list: - if functionality_group in line: - functions.extend([" ", functionality_group, " "]) - frames.extend([" ", " ", " "]) - maxVals.extend([" ", " ", " "]) - minVals.extend([" ", " ", " "]) - avgVals.extend([" ", " ", " "]) - - if "max,min,avg wall times in ms/batch" in line: - split_word_start = "Running " - split_word_end = " " +str(numRuns) - prevLine = prevLine.partition(split_word_start)[2].partition(split_word_end)[0] - if prevLine not in functions: - functions.append(prevLine) - frames.append(numRuns) - split_word_start = "max,min,avg wall times in ms/batch = " - split_word_end = "\n" - stats = line.partition(split_word_start)[2].partition(split_word_end)[0].split(",") - maxVals.append(stats[0]) - minVals.append(stats[1]) - avgVals.append(stats[2]) - funcCount += 1 - - if line != "\n": - prevLine = line - - # Print log lengths - print("Functionalities - "+ str(funcCount)) - - # Print summary of log - print("\n\nFunctionality\t\t\t\t\t\tFrames Count\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n") - if len(functions) != 0: - maxCharLength = len(max(functions, key = len)) - functions = [x + (' ' * (maxCharLength - len(x))) for x in functions] - for i, func in enumerate(functions): - print(func + "\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t" + str(minVals[i]) + "\t" + str(avgVals[i])) - else: - print("No variants under this category") - - # Closing log file - f.close() + print_performance_tests_summary(log_file, functionality_group_list, numRuns) diff --git a/utilities/test_suite/HOST/runTests_voxel.py b/utilities/test_suite/HOST/runTests_voxel.py index d94dcd0f9..998d9d33e 100644 --- a/utilities/test_suite/HOST/runTests_voxel.py +++ b/utilities/test_suite/HOST/runTests_voxel.py @@ -23,11 +23,10 @@ """ import os -import subprocess # nosec -import argparse import sys -import datetime -import shutil +sys.dont_write_bytecode = True +sys.path.append(os.path.join(os.path.dirname( __file__ ), '..' )) +from common import * # Set the timestamp timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") @@ -41,66 +40,6 @@ caseMin = 0 caseMax = 5 -# Check if folder path is empty, if it is the root folder, or if it exists, and remove its contents -def validate_and_remove_contents(path): - if not path: # check if a string is empty - print("Folder path is empty.") - exit() - if path == "/*": # check if the root directory is passed to the function - print("Root folder cannot be deleted.") - exit() - if os.path.exists(path): # check if the folder exists - # Get a list of files and directories within the specified path - items = os.listdir(path) - - if items: - # The directory is not empty, delete its contents - for item in items: - item_path = os.path.join(path, item) - if os.path.isfile(item_path): - os.remove(item_path) - elif os.path.isdir(item_path): - shutil.rmtree(item_path) # Delete the directory if it exists - else: - print("Path is invalid or does not exist.") - exit() - -# Check if the folder is the root folder or exists, and remove the specified subfolders -def validate_and_remove_folders(path, folder): - if path == "/*": # check if the root directory is passed to the function - print("Root folder cannot be deleted.") - exit() - if path and os.path.isdir(path): # checks if directory string is not empty and it exists - output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)] - - # Loop through each directory and delete it only if it exists - for folder_name in output_folders: - folder_path = os.path.join(path, folder_name) - if os.path.isdir(folder_path): - shutil.rmtree(folder_path) # Delete the directory if it exists - print("Deleted directory:", folder_path) - else: - print("Directory not found:", folder_path) - -# Create layout directories within a destination path based on a layout dictionary -def create_layout_directories(dst_path, layout_dict): - for layout in range(3): - current_layout = layout_dict[layout] - try: - os.makedirs(dst_path + '/' + current_layout) - except FileExistsError: - pass - folder_list = [f for f in os.listdir(dst_path) if current_layout.lower() in f] - for folder in folder_list: - os.rename(dst_path + '/' + folder, dst_path + '/' + current_layout + '/' + folder) - -# Validate if a path exists and is a directory -def validate_path(input_path): - if not os.path.exists(input_path): - raise ValueError("path " + input_path +" does not exist.") - if not os.path.isdir(input_path): - raise ValueError("path " + input_path + " is not a directory.") - # Get a list of log files based on a flag for preserving output def get_log_file_list(): return [ @@ -118,36 +57,14 @@ def func_group_finder(case_number): else: return "miscellaneous" -# Generate a directory name based on certain parameters -def directory_name_generator(qaMode, affinity, layoutType, case, path): - if qaMode == 0: - functionality_group = func_group_finder(int(case)) - dst_folder_temp = "{}/rpp_{}_{}_{}".format(path, affinity, layoutType, functionality_group) - else: - dst_folder_temp = path - - return dst_folder_temp - -# Process the layout based on the given parameters and generate the directory name and log file layout. -def process_layout(layout, qaMode, case, dstPath): - if layout == 0: - dstPathTemp = directory_name_generator(qaMode, "host", "pkd3", case, dstPath) - logFileLayout = "pkd3" - elif layout == 1: - dstPathTemp = directory_name_generator(qaMode, "host", "pln3", case, dstPath) - logFileLayout = "pln3" - elif layout == 2: - dstPathTemp = directory_name_generator(qaMode, "host", "pln1", case, dstPath) - logFileLayout = "pln1" - - return dstPathTemp, logFileLayout - def run_unit_test(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize): print("\n\n\n\n") print("--------------------------------") print("Running a New Functionality...") print("--------------------------------") bitDepths = [0, 2] + if qaMode: + bitDepths = [2] for bitDepth in bitDepths: print("\n\n\nRunning New Bit Depth...\n-------------------------\n\n") print("\n\n\n\n") @@ -177,8 +94,7 @@ def run_performance_test(loggingFolder, logFileLayout, headerPath, dataPath, dst log_file.write(cleaned_output + '\n') if "max,min,avg wall times" in output: log_file.write("\n") - - print("------------------------------------------------------------------------------------------") + print("------------------------------------------------------------------------------------------") # Parse and validate command-line arguments for the RPP test suite def rpp_test_suite_parser_and_validator(): @@ -287,7 +203,7 @@ def rpp_test_suite_parser_and_validator(): dstPath = outFilePath # Validate DST_FOLDER -validate_and_remove_contents(dstPath) +validate_and_remove_files(dstPath) # Enable extglob if os.path.exists(buildFolderPath + "/build"): @@ -299,6 +215,9 @@ def rpp_test_suite_parser_and_validator(): subprocess.run(["cmake", scriptPath], cwd=".") # nosec subprocess.run(["make", "-j16"], cwd=".") # nosec +# List of cases supported +supportedCaseList = ['0', '1', '2', '3', '5'] + print("\n\n\n\n\n") print("##########################################################################################") print("Running all layout Inputs...") @@ -307,8 +226,10 @@ def rpp_test_suite_parser_and_validator(): bitDepths = [0, 2] if testType == 0: for case in caseList: + if case not in supportedCaseList: + continue for layout in range(3): - dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath) + dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder) if qaMode == 0: if not os.path.isdir(dstPathTemp): os.mkdir(dstPathTemp) @@ -316,37 +237,20 @@ def rpp_test_suite_parser_and_validator(): run_unit_test(headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize) else: for case in caseList: + if case not in supportedCaseList: + continue for layout in range(3): - dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath) + dstPathTemp, logFileLayout = process_layout(layout, qaMode, case, dstPath, "host", func_group_finder) run_performance_test(loggingFolder, logFileLayout, headerPath, dataPath, dstPathTemp, layout, case, numRuns, testType, qaMode, batchSize) # print the results of qa tests -supportedCaseList = ['0', '1', '2', '3', '5'] nonQACaseList = [] # Add cases present in supportedCaseList, but without QA support if qaMode and testType == 0: qaFilePath = os.path.join(outFilePath, "QA_results.txt") checkFile = os.path.isfile(qaFilePath) if checkFile: - f = open(qaFilePath, 'r+') - print("---------------------------------- Results of QA Test - Tensor_voxel_host ----------------------------------\n") - numLines = 0 - numPassed = 0 - for line in f: - sys.stdout.write(line) - numLines += 1 - if "PASSED" in line: - numPassed += 1 - sys.stdout.flush() - resultsInfo = "\n\nFinal Results of Tests:" - resultsInfo += "\n - Total test cases including all subvariants REQUESTED = " + str(numLines) - resultsInfo += "\n - Total test cases including all subvariants PASSED = " + str(numPassed) - resultsInfo += "\n\nGeneral information on Tensor voxel test suite availability:" - resultsInfo += "\n - Total augmentations supported in Tensor test suite = " + str(len(supportedCaseList)) - resultsInfo += "\n - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList)) - resultsInfo += "\n - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList)) - f.write(resultsInfo) - print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------") + print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList) layoutDict = {0:"PKD3", 1:"PLN3", 2:"PLN1"} if (testType == 0 and qaMode == 0): # Unit tests @@ -360,63 +264,4 @@ def rpp_test_suite_parser_and_validator(): ] for log_file in log_file_list: - # Opening log file - try: - f = open(log_file,"r") - print("\n\n\nOpened log file -> "+ log_file) - except IOError: - print("Skipping file -> "+ log_file) - continue - - stats = [] - maxVals = [] - minVals = [] - avgVals = [] - functions = [] - frames = [] - prevLine = "" - funcCount = 0 - - # Loop over each line - for line in f: - for functionality_group in functionality_group_list: - if functionality_group in line: - functions.extend([" ", functionality_group, " "]) - frames.extend([" ", " ", " "]) - maxVals.extend([" ", " ", " "]) - minVals.extend([" ", " ", " "]) - avgVals.extend([" ", " ", " "]) - - if "max,min,avg wall times in ms/batch" in line: - split_word_start = "Running " - split_word_end = " " +str(numRuns) - prevLine = prevLine.partition(split_word_start)[2].partition(split_word_end)[0] - if prevLine not in functions: - functions.append(prevLine) - frames.append(numRuns) - split_word_start = "max,min,avg wall times in ms/batch = " - split_word_end = "\n" - stats = line.partition(split_word_start)[2].partition(split_word_end)[0].split(",") - maxVals.append(stats[0]) - minVals.append(stats[1]) - avgVals.append(stats[2]) - funcCount += 1 - - if line != "\n": - prevLine = line - - # Print log lengths - print("Functionalities - "+ str(funcCount)) - - # Print summary of log - print("\n\nFunctionality\t\t\t\t\t\tFrames Count\t\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n") - if len(functions) != 0: - maxCharLength = len(max(functions, key = len)) - functions = [x + (' ' * (maxCharLength - len(x))) for x in functions] - for i, func in enumerate(functions): - print(func + "\t\t\t\t\t\t\t\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t\t" + str(minVals[i]) + "\t\t" + str(avgVals[i])) - else: - print("No variants under this category") - - # Closing log file - f.close() \ No newline at end of file + print_performance_tests_summary(log_file, functionality_group_list, numRuns) diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py new file mode 100644 index 000000000..1646cdc94 --- /dev/null +++ b/utilities/test_suite/common.py @@ -0,0 +1,232 @@ +""" +MIT License + +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" +import os +import subprocess # nosec +import argparse +import sys +import datetime +import shutil + +# Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents +def validate_and_remove_files(path): + if not path: # check if a string is empty + print("Folder path is empty.") + exit() + + elif path == "/*": # check if the root directory is passed to the function + print("Root folder cannot be deleted.") + exit() + + elif os.path.exists(path): # check if the folder exists + # Get a list of files and directories within the specified path + items = os.listdir(path) + + if items: + # The directory is not empty, delete its contents + for item in items: + item_path = os.path.join(path, item) + if os.path.isfile(item_path): + os.remove(item_path) + elif os.path.isdir(item_path): + shutil.rmtree(item_path) # Delete the directory if it exists + + else: + print("Path is invalid or does not exist.") + exit() + +# Check if the folder is the root folder or exists, and remove the specified subfolders +def validate_and_remove_folders(path, folder): + if path == "/*": # check if the root directory is passed to the function + print("Root folder cannot be deleted.") + exit() + if path and os.path.isdir(path): # checks if directory string is not empty and it exists + output_folders = [folder_name for folder_name in os.listdir(path) if folder_name.startswith(folder)] + + # Loop through each directory and delete it only if it exists + for folder_name in output_folders: + folder_path = os.path.join(path, folder_name) + if os.path.isdir(folder_path): + shutil.rmtree(folder_path) # Delete the directory if it exists + print("Deleted directory:", folder_path) + else: + print("Directory not found:", folder_path) + +# Check if a case file exists and filter its contents based on certain conditions +def case_file_check(CASE_FILE_PATH, TYPE, TENSOR_TYPE_LIST, new_file, d_counter): + try: + case_file = open(CASE_FILE_PATH,'r') + for line in case_file: + print(line) + if not(line.startswith('"Name"')): + if TYPE in TENSOR_TYPE_LIST: + new_file.write(line) + d_counter[TYPE] = d_counter[TYPE] + 1 + case_file.close() + return True + except IOError: + print("Unable to open case results") + return False + + # Generate a directory name based on certain parameters +def directory_name_generator(qaMode, affinity, layoutType, case, path, func_group_finder): + if qaMode == 0: + functionality_group = func_group_finder(int(case)) + dst_folder_temp = f"{path}/rpp_{affinity}_{layoutType}_{functionality_group}" + else: + dst_folder_temp = path + + return dst_folder_temp + +# Process the layout based on the given parameters and generate the directory name and log file layout. +def process_layout(layout, qaMode, case, dstPath, backend, func_group_finder): + if layout == 0: + dstPathTemp = directory_name_generator(qaMode, backend, "pkd3", case, dstPath, func_group_finder) + log_file_layout = "pkd3" + elif layout == 1: + dstPathTemp = directory_name_generator(qaMode, backend, "pln3", case, dstPath, func_group_finder) + log_file_layout = "pln3" + elif layout == 2: + dstPathTemp = directory_name_generator(qaMode, backend, "pln1", case, dstPath, func_group_finder) + log_file_layout = "pln1" + + return dstPathTemp, log_file_layout + +# Validate if a path exists and is a directory +def validate_path(input_path): + if not os.path.exists(input_path): + raise ValueError("path " + input_path +" does not exist.") + if not os.path.isdir(input_path): + raise ValueError("path " + input_path + " is not a directory.") + +# Create layout directories within a destination path based on a layout dictionary +def create_layout_directories(dst_path, layout_dict): + for layout in range(3): + current_layout = layout_dict[layout] + try: + os.makedirs(dst_path + '/' + current_layout) + except FileExistsError: + pass + folder_list = [f for f in os.listdir(dst_path) if current_layout.lower() in f] + for folder in folder_list: + os.rename(dst_path + '/' + folder, dst_path + '/' + current_layout + '/' + folder) + +# Read data from the logs generated from rocprof, process the data +# and generate performance reports based on counters and a list of types +def generate_performance_reports(d_counter, TYPE_LIST, RESULTS_DIR): + import pandas as pd + pd.options.display.max_rows = None + # Generate performance report + for TYPE in TYPE_LIST: + print("\n\n\nKernels tested - ", d_counter[TYPE], "\n\n") + df = pd.read_csv(RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv") + df["AverageMs"] = df["AverageNs"] / 1000000 + dfPrint = df.drop(['Percentage'], axis = 1) + dfPrint["HIP Kernel Name"] = dfPrint.iloc[:,0].str.lstrip("Hip_") + dfPrint_noIndices = dfPrint.astype(str) + dfPrint_noIndices.replace(['0', '0.0'], '', inplace = True) + dfPrint_noIndices = dfPrint_noIndices.to_string(index = False) + print(dfPrint_noIndices) + +# Read the data from QA logs, process the data and print the results as a summary +def print_qa_tests_summary(qaFilePath, supportedCaseList, nonQACaseList): + f = open(qaFilePath, 'r+') + numLines = 0 + numPassed = 0 + for line in f: + sys.stdout.write(line) + numLines += 1 + if "PASSED" in line: + numPassed += 1 + sys.stdout.flush() + resultsInfo = "\n\nFinal Results of Tests:" + resultsInfo += "\n - Total test cases including all subvariants REQUESTED = " + str(numLines) + resultsInfo += "\n - Total test cases including all subvariants PASSED = " + str(numPassed) + resultsInfo += "\n\nGeneral information on Tensor voxel test suite availability:" + resultsInfo += "\n - Total augmentations supported in Tensor test suite = " + str(len(supportedCaseList)) + resultsInfo += "\n - Total augmentations with golden output QA test support = " + str(len(supportedCaseList) - len(nonQACaseList)) + resultsInfo += "\n - Total augmentations without golden ouput QA test support (due to randomization involved) = " + str(len(nonQACaseList)) + f.write(resultsInfo) + print("\n-------------------------------------------------------------------" + resultsInfo + "\n\n-------------------------------------------------------------------") + +# Read the data from performance logs, process the data and print the results as a summary +def print_performance_tests_summary(logFile, functionalityGroupList, numRuns): + try: + f = open(logFile, "r") + print("\n\n\nOpened log file -> "+ logFile) + except IOError: + print("Skipping file -> "+ logFile) + return + + stats = [] + maxVals = [] + minVals = [] + avgVals = [] + functions = [] + frames = [] + prevLine = "" + funcCount = 0 + + # Loop over each line + for line in f: + for functionalityGroup in functionalityGroupList: + if functionalityGroup in line: + functions.extend([" ", functionalityGroup, " "]) + frames.extend([" ", " ", " "]) + maxVals.extend([" ", " ", " "]) + minVals.extend([" ", " ", " "]) + avgVals.extend([" ", " ", " "]) + + if "max,min,avg wall times in ms/batch" in line: + splitWordStart = "Running " + splitWordEnd = " " +str(numRuns) + prevLine = prevLine.partition(splitWordStart)[2].partition(splitWordEnd)[0] + if prevLine not in functions: + functions.append(prevLine) + frames.append(numRuns) + splitWordStart = "max,min,avg wall times in ms/batch = " + splitWordEnd = "\n" + stats = line.partition(splitWordStart)[2].partition(splitWordEnd)[0].split(",") + maxVals.append(stats[0]) + minVals.append(stats[1]) + avgVals.append(stats[2]) + funcCount += 1 + + if line != "\n": + prevLine = line + + # Print log lengths + print("Functionalities - "+ str(funcCount)) + + # Print summary of log + print("\n\nFunctionality\t\t\t\t\t\tFrames Count\tmax(ms/batch)\t\tmin(ms/batch)\t\tavg(ms/batch)\n") + if len(functions) != 0: + maxCharLength = len(max(functions, key = len)) + functions = [x + (' ' * (maxCharLength - len(x))) for x in functions] + for i, func in enumerate(functions): + print(func + "\t" + str(frames[i]) + "\t\t" + str(maxVals[i]) + "\t" + str(minVals[i]) + "\t" + str(avgVals[i])) + else: + print("No variants under this category") + + # Closing log file + f.close() \ No newline at end of file