Skip to content

Commit

Permalink
Merge pull request #126 from sampath1117/sr/opt_water_merge
Browse files Browse the repository at this point in the history
Water - HOST Tensor AVX2 Support and Vectorized HIP support
  • Loading branch information
r-abishek committed Jul 18, 2023
2 parents 250d99d + b418e24 commit 9da6be1
Show file tree
Hide file tree
Showing 18 changed files with 1,819 additions and 20 deletions.
23 changes: 23 additions & 0 deletions include/rppt_tensor_effects_augmentations.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,29 @@ RppStatus rppt_non_linear_blend_host(RppPtr_t srcPtr1, RppPtr_t srcPtr2, RpptDes
RppStatus rppt_non_linear_blend_gpu(RppPtr_t srcPtr1, RppPtr_t srcPtr2, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *stdDevTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

/******************** water ********************/

// *param[in] srcPtr source tensor memory
// *param[in] srcDescPtr source tensor descriptor
// *param[out] dstPtr destination tensor memory
// *param[in] dstDescPtr destination tensor descriptor
// *param[in] amplitudeXTensor amplitudeX values for water effect (1D tensor of size batchSize)
// *param[in] amplitudeYTensor amplitudeY values for water effect (1D tensor of size batchSize)
// *param[in] freqXTensor freqX values for water effect (1D tensor of size batchSize)
// *param[in] freqYTensor freqY values for water effect (1D tensor of size batchSize)
// *param[in] phaseXTensor amplitudeY values for water effect (1D tensor of size batchSize)
// *param[in] phaseYTensor amplitudeY values for water effect (1D tensor of size batchSize)
// *param[in] roiTensorSrc ROI data for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
// *param[in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
// *returns a RppStatus enumeration.
// *retval RPP_SUCCESS : succesful completion
// *retval RPP_ERROR : Error

RppStatus rppt_water_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *amplitudeXTensor, Rpp32f *amplitudeYTensor, Rpp32f *frequencyXTensor, Rpp32f *frequencyYTensor, Rpp32f *phaseXTensor, Rpp32f *phaseYTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
#ifdef GPU_SUPPORT
RppStatus rppt_water_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *amplitudeXTensor, Rpp32f *amplitudeYTensor, Rpp32f *frequencyXTensor, Rpp32f *frequencyYTensor, Rpp32f *phaseXTensor, Rpp32f *phaseYTensor, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
#endif // GPU_SUPPORT

#ifdef __cplusplus
}
#endif
Expand Down
14 changes: 14 additions & 0 deletions src/include/cpu/rpp_cpu_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5044,6 +5044,20 @@ inline void compute_generic_nn_srclocs_and_validate_sse(__m128 pSrcY, __m128 pSr
_mm_storeu_si128((__m128i*) srcLoc, pxSrcLoc);
}

inline void compute_generic_nn_srclocs_and_validate_avx(__m256 pSrcY, __m256 pSrcX, __m256 *pRoiLTRB, __m256 pSrcStrideH, Rpp32s *srcLoc, Rpp32s *invalidLoad, bool hasRGBChannels = false)
{
pSrcY = _mm256_round_ps(pSrcY, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)); // Nearest Neighbor Y location vector
pSrcX = _mm256_round_ps(pSrcX, (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)); // Nearest Neighbor X location vector
_mm256_storeu_si256((__m256i*) invalidLoad, _mm256_cvtps_epi32(_mm256_or_ps( // Vectorized ROI boundary check
_mm256_or_ps(_mm256_cmp_ps(pSrcX, pRoiLTRB[0], _CMP_LT_OQ), _mm256_cmp_ps(pSrcY, pRoiLTRB[1],_CMP_LT_OQ)),
_mm256_or_ps(_mm256_cmp_ps(pSrcX, pRoiLTRB[2], _CMP_GT_OQ), _mm256_cmp_ps(pSrcY, pRoiLTRB[3], _CMP_GT_OQ))
)));
if (hasRGBChannels)
pSrcX = _mm256_mul_ps(pSrcX, avx_p3);
__m256i pxSrcLoc = _mm256_cvtps_epi32(_mm256_fmadd_ps(pSrcY, pSrcStrideH, pSrcX));
_mm256_storeu_si256((__m256i*) srcLoc, pxSrcLoc);
}

template <typename T>
inline void compute_generic_nn_interpolation_pkd3_to_pln3(Rpp32f srcY, Rpp32f srcX, RpptROI *roiLTRB, T *dstPtrTempR, T *dstPtrTempG, T *dstPtrTempB, T *srcPtrChannel, RpptDescPtr srcDescPtr)
{
Expand Down
255 changes: 241 additions & 14 deletions src/include/cpu/rpp_cpu_simd.hpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/modules/cpu/host_tensor_effects_augmentations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ THE SOFTWARE.
#include "kernel/noise_shot.hpp"
#include "kernel/noise_gaussian.hpp"
#include "kernel/non_linear_blend.hpp"
#include "kernel/water.hpp"

#endif // HOST_TENSOR_EFFECTS_AUGMENTATIONS_HPP
977 changes: 977 additions & 0 deletions src/modules/cpu/kernel/water.hpp

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/modules/hip/hip_tensor_effects_augmentations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ THE SOFTWARE.
#include "kernel/noise_shot.hpp"
#include "kernel/noise_gaussian.hpp"
#include "kernel/non_linear_blend.hpp"
#include "kernel/water.hpp"

#endif // HIP_TENSOR_EFFECTS_AUGMENTATIONS_HPP
315 changes: 315 additions & 0 deletions src/modules/hip/kernel/water.hpp

Large diffs are not rendered by default.

159 changes: 159 additions & 0 deletions src/modules/rppt_tensor_effects_augmentations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,95 @@ RppStatus rppt_non_linear_blend_host(RppPtr_t srcPtr1,
return RPP_SUCCESS;
}

/******************** water ********************/

RppStatus rppt_water_host(RppPtr_t srcPtr,
RpptDescPtr srcDescPtr,
RppPtr_t dstPtr,
RpptDescPtr dstDescPtr,
Rpp32f *amplitudeXTensor,
Rpp32f *amplitudeYTensor,
Rpp32f *frequencyXTensor,
Rpp32f *frequencyYTensor,
Rpp32f *phaseXTensor,
Rpp32f *phaseYTensor,
RpptROIPtr roiTensorPtrSrc,
RpptRoiType roiType,
rppHandle_t rppHandle)
{
RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c);
if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
{
water_u8_u8_host_tensor(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes,
srcDescPtr,
static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes,
dstDescPtr,
amplitudeXTensor,
amplitudeYTensor,
frequencyXTensor,
frequencyYTensor,
phaseXTensor,
phaseYTensor,
roiTensorPtrSrc,
roiType,
layoutParams,
rpp::deref(rppHandle));
}
else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16))
{
water_f16_f16_host_tensor(reinterpret_cast<Rpp16f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
srcDescPtr,
reinterpret_cast<Rpp16f*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
dstDescPtr,
amplitudeXTensor,
amplitudeYTensor,
frequencyXTensor,
frequencyYTensor,
phaseXTensor,
phaseYTensor,
roiTensorPtrSrc,
roiType,
layoutParams,
rpp::deref(rppHandle));
}
else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
{
water_f32_f32_host_tensor(reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
srcDescPtr,
reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
dstDescPtr,
amplitudeXTensor,
amplitudeYTensor,
frequencyXTensor,
frequencyYTensor,
phaseXTensor,
phaseYTensor,
roiTensorPtrSrc,
roiType,
layoutParams,
rpp::deref(rppHandle));
}
else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8))
{
water_i8_i8_host_tensor(static_cast<Rpp8s*>(srcPtr) + srcDescPtr->offsetInBytes,
srcDescPtr,
static_cast<Rpp8s*>(dstPtr) + dstDescPtr->offsetInBytes,
dstDescPtr,
amplitudeXTensor,
amplitudeYTensor,
frequencyXTensor,
frequencyYTensor,
phaseXTensor,
phaseYTensor,
roiTensorPtrSrc,
roiType,
layoutParams,
rpp::deref(rppHandle));
}

return RPP_SUCCESS;
}

/********************************************************************************************************************/
/*********************************************** RPP_GPU_SUPPORT = ON ***********************************************/
/********************************************************************************************************************/
Expand Down Expand Up @@ -1013,4 +1102,74 @@ RppStatus rppt_non_linear_blend_gpu(RppPtr_t srcPtr1,
#endif // backend
}

RppStatus rppt_water_gpu(RppPtr_t srcPtr,
RpptDescPtr srcDescPtr,
RppPtr_t dstPtr,
RpptDescPtr dstDescPtr,
Rpp32f *amplitudeXTensor,
Rpp32f *amplitudeYTensor,
Rpp32f *frequencyXTensor,
Rpp32f *frequencyYTensor,
Rpp32f *phaseXTensor,
Rpp32f *phaseYTensor,
RpptROIPtr roiTensorPtrSrc,
RpptRoiType roiType,
rppHandle_t rppHandle)
{
#ifdef HIP_COMPILE
Rpp32u paramIndex = 0;
copy_param_float(amplitudeXTensor, rpp::deref(rppHandle), paramIndex++);
copy_param_float(amplitudeYTensor, rpp::deref(rppHandle), paramIndex++);
copy_param_float(frequencyXTensor, rpp::deref(rppHandle), paramIndex++);
copy_param_float(frequencyYTensor, rpp::deref(rppHandle), paramIndex++);
copy_param_float(phaseXTensor, rpp::deref(rppHandle), paramIndex++);
copy_param_float(phaseYTensor, rpp::deref(rppHandle), paramIndex);

if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
{
hip_exec_water_tensor(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes,
srcDescPtr,
static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes,
dstDescPtr,
roiTensorPtrSrc,
roiType,
rpp::deref(rppHandle));
}
else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16))
{
hip_exec_water_tensor(reinterpret_cast<half*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
srcDescPtr,
reinterpret_cast<half*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
dstDescPtr,
roiTensorPtrSrc,
roiType,
rpp::deref(rppHandle));
}
else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
{
hip_exec_water_tensor(reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(srcPtr) + srcDescPtr->offsetInBytes),
srcDescPtr,
reinterpret_cast<Rpp32f*>(static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes),
dstDescPtr,
roiTensorPtrSrc,
roiType,
rpp::deref(rppHandle));
}
else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8))
{
hip_exec_water_tensor(static_cast<Rpp8s*>(srcPtr) + srcDescPtr->offsetInBytes,
srcDescPtr,
static_cast<Rpp8s*>(dstPtr) + dstDescPtr->offsetInBytes,
dstDescPtr,
roiTensorPtrSrc,
roiType,
rpp::deref(rppHandle));
}

return RPP_SUCCESS;
#elif defined(OCL_COMPILE)
return RPP_ERROR_NOT_IMPLEMENTED;
#endif // backend
}

#endif // GPU_SUPPORT
29 changes: 29 additions & 0 deletions utilities/test_suite/HIP/Tensor_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,35 @@ int main(int argc, char **argv)

break;
}
case 29:
{
testCaseName = "water";

Rpp32f amplX[batchSize];
Rpp32f amplY[batchSize];
Rpp32f freqX[batchSize];
Rpp32f freqY[batchSize];
Rpp32f phaseX[batchSize];
Rpp32f phaseY[batchSize];

for (i = 0; i < batchSize; i++)
{
amplX[i] = 2.0f;
amplY[i] = 5.0f;
freqX[i] = 5.8f;
freqY[i] = 1.2f;
phaseX[i] = 10.0f;
phaseY[i] = 15.0f;
}

startWallTime = omp_get_wtime();
if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
rppt_water_gpu(d_input, srcDescPtr, d_output, dstDescPtr, amplX, amplY, freqX, freqY, phaseX, phaseY, roiTensorPtrSrc, roiTypeSrc, handle);
else
missingFuncFlag = 1;

break;
}
case 31:
{
testCaseName = "color_cast";
Expand Down
8 changes: 6 additions & 2 deletions utilities/test_suite/HIP/runTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,11 +386,15 @@ def rpp_test_suite_parser_and_validator():
print("Unable to open results in " + RESULTS_DIR + "/consolidated_results_" + TYPE + ".stats.csv")

# print the results of qa tests
supportedCaseList = ['0', '1', '2', '4', '13', '31', '34', '36', '37', '38','84']
supportedCaseList = ['0', '1', '2', '4', '13', '29', '31', '34', '36', '37', '38','84']
nonQACaseList = ['84']
supportedCases = 0
for num in caseList:
if num in supportedCaseList:
if qaMode == 1 and num not in nonQACaseList:
supportedCases += 1
elif qaMode == 0 and num in supportedCaseList:
supportedCases += 1

caseInfo = "Tests are run for " + str(supportedCases) + " supported cases out of the " + str(len(caseList)) + " cases requested"
if qaMode and testType == 0:
qaFilePath = os.path.join(outFilePath, "QA_results.txt")
Expand Down
8 changes: 7 additions & 1 deletion utilities/test_suite/HIP/testAllScript.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ cwd=$(pwd)

# <<<<<<<<<<<<<< VALIDATION CHECK FOR FOLDER PATHS >>>>>>>>>>>>>>>>>>>>>>>>>>>>
function VALIDATE_PATH {
if [ -z "$1" ]; then #check if a string is empty
if [ -z "$1" ]; then #check if a string is empty
echo "$1 Folder path is empty."
exit
fi
Expand Down Expand Up @@ -208,6 +208,12 @@ echo "##########################################################################
if [ "$TEST_TYPE" -eq 0 ]; then
for case in ${CASE_LIST[@]};
do
if [ "$QA_MODE" -eq 1 ]; then
if [ "$case" -eq " 84" ]; then
echo "QA tests are not supported for case number $case, since it generates random output"
continue
fi
fi
if [ "$case" -lt "0" ] || [ "$case" -gt " 84" ]; then
echo "Invalid case number $case. case number must be in the 0:84 range!"
continue
Expand Down
29 changes: 29 additions & 0 deletions utilities/test_suite/HOST/Tensor_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,35 @@ int main(int argc, char **argv)

break;
}
case 29:
{
testCaseName = "water";

Rpp32f amplX[batchSize];
Rpp32f amplY[batchSize];
Rpp32f freqX[batchSize];
Rpp32f freqY[batchSize];
Rpp32f phaseX[batchSize];
Rpp32f phaseY[batchSize];

for (i = 0; i < batchSize; i++)
{
amplX[i] = 2.0f;
amplY[i] = 5.0f;
freqX[i] = 5.8f;
freqY[i] = 1.2f;
phaseX[i] = 10.0f;
phaseY[i] = 15.0f;
}

startWallTime = omp_get_wtime();
if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5)
rppt_water_host(input, srcDescPtr, output, dstDescPtr, amplX, amplY, freqX, freqY, phaseX, phaseY, roiTensorPtrSrc, roiTypeSrc, handle);
else
missingFuncFlag = 1;

break;
}
case 31:
{
testCaseName = "color_cast";
Expand Down
8 changes: 6 additions & 2 deletions utilities/test_suite/HOST/runTests.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,15 @@ def rpp_test_suite_parser_and_validator():
subprocess.call(["./testAllScript.sh", srcPath1, args.input_path2, str(testType), str(numRuns), str(qaMode), str(decoderType), str(preserveOutput), str(batchSize), " ".join(caseList)]) # nosec

# print the results of qa tests
supportedCaseList = ['0', '1', '2', '4', '13', '31', '34', '36', '37', '38', '84']
supportedCaseList = ['0', '1', '2', '4', '13', '29', '31', '34', '36', '37', '38', '84']
nonQACaseList = ['84']
supportedCases = 0
for num in caseList:
if num in supportedCaseList:
if qaMode == 1 and num not in nonQACaseList:
supportedCases += 1
elif qaMode == 0 and num in supportedCaseList:
supportedCases += 1

caseInfo = "Tests are run for " + str(supportedCases) + " supported cases out of the " + str(len(caseList)) + " cases requested"
if qaMode and testType == 0:
qaFilePath = os.path.join(outFilePath, "QA_results.txt")
Expand Down
8 changes: 7 additions & 1 deletion utilities/test_suite/HOST/testAllScript.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ cwd=$(pwd)

# <<<<<<<<<<<<<< VALIDATION CHECK FOR FOLDER PATHS >>>>>>>>>>>>>>>>>>>>>>>>>>>>
function VALIDATE_PATH {
if [ -z "$1" ]; then #check if a string is empty
if [ -z "$1" ]; then #check if a string is empty
echo "$1 Folder path is empty."
exit
fi
Expand Down Expand Up @@ -200,6 +200,12 @@ echo "##########################################################################
if [ "$TEST_TYPE" -eq 0 ]; then
for case in ${CASE_LIST[@]};
do
if [ "$QA_MODE" -eq 1 ]; then
if [ "$case" -eq " 84" ]; then
echo "QA tests are not supported for case number $case, since it generates random output"
continue
fi
fi
if [ "$case" -lt "0" ] || [ "$case" -gt " 84" ]; then
echo "Invalid case number $case. case number must be in the 0:84 range!"
continue
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions utilities/test_suite/rpp_test_suite_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ std::map<int, string> augmentationMap =
{2, "blend"},
{4, "contrast"},
{13, "exposure"},
{29, "water"},
{31, "color_cast"},
{34, "lut"},
{36, "color_twist"},
Expand Down

0 comments on commit 9da6be1

Please sign in to comment.