diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h index 138b3baa8..31bb34eff 100644 --- a/include/rppt_tensor_audio_augmentations.h +++ b/include/rppt_tensor_audio_augmentations.h @@ -95,7 +95,22 @@ RppStatus rppt_to_decibels_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_ */ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, Rpp32f *coeffTensor, RpptAudioBorderType borderType, rppHandle_t rppHandle); +/*! \brief Down Mixing augmentation on HOST backend +* \details Down Mixing augmentation for audio data +* \param[in] srcPtr source tensor in HOST memory +* \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32) +* \param[out] dstPtr destination tensor in HOST memory +* \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32) +* \param[in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2) +* \param[in] normalizeWeights bool flag to specify if normalization of weights is needed +* \param[in] rppHandle RPP HOST handle created with \ref rppCreateWithBatchSize() +* \return A \ref RppStatus enumeration. +* \retval RPP_SUCCESS Successful completion. +* \retval RPP_ERROR* Unsuccessful completion. +*/ +RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle); + #ifdef __cplusplus } #endif -#endif // RPPT_TENSOR_AUDIO_AUGMENTATIONS_H \ No newline at end of file +#endif // RPPT_TENSOR_AUDIO_AUGMENTATIONS_H diff --git a/src/include/cpu/rpp_cpu_simd.hpp b/src/include/cpu/rpp_cpu_simd.hpp index 84c898b90..ff30de027 100644 --- a/src/include/cpu/rpp_cpu_simd.hpp +++ b/src/include/cpu/rpp_cpu_simd.hpp @@ -2438,6 +2438,29 @@ static inline __m128 log_ps(__m128 x) return x; } +inline Rpp32f rpp_hsum_ps(__m128 x) +{ + __m128 shuf = _mm_movehdup_ps(x); // broadcast elements 3,1 to 2,0 + __m128 sums = _mm_add_ps(x, shuf); + shuf = _mm_movehl_ps(shuf, sums); // high half -> low half + sums = _mm_add_ss(sums, shuf); + return _mm_cvtss_f32(sums); +} + +inline Rpp32f rpp_hsum_ps(__m256 x) +{ + __m128 p0 = _mm256_extractf128_ps(x, 1); // Contains x7, x6, x5, x4 + __m128 p1 = _mm256_castps256_ps128(x); // Contains x3, x2, x1, x0 + __m128 sum = _mm_add_ps(p0, p1); // Contains x3 + x7, x2 + x6, x1 + x5, x0 + x4 + p0 = sum; // Contains -, -, x1 + x5, x0 + x4 + p1 = _mm_movehl_ps(sum, sum); // Contains -, -, x3 + x7, x2 + x6 + sum = _mm_add_ps(p0, p1); // Contains -, -, x1 + x3 + x5 + x7, x0 + x2 + x4 + x6 + p0 = sum; // Contains -, -, -, x0 + x2 + x4 + x6 + p1 = _mm_shuffle_ps(sum, sum, 0x1); // Contains -, -, -, x1 + x3 + x5 + x7 + sum = _mm_add_ss(p0, p1); // Contains -, -, -, x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7 + return _mm_cvtss_f32(sum); +} + static inline void fast_matmul4x4_sse(float *A, float *B, float *C) { __m128 row1 = _mm_load_ps(&B[0]); // Row 0 of B diff --git a/src/modules/cpu/host_tensor_audio_augmentations.hpp b/src/modules/cpu/host_tensor_audio_augmentations.hpp index 7737b38c3..e2edb1afc 100644 --- a/src/modules/cpu/host_tensor_audio_augmentations.hpp +++ b/src/modules/cpu/host_tensor_audio_augmentations.hpp @@ -28,5 +28,6 @@ SOFTWARE. #include "kernel/non_silent_region_detection.hpp" #include "kernel/to_decibels.hpp" #include "kernel/pre_emphasis_filter.hpp" +#include "kernel/down_mixing.hpp" #endif // HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP \ No newline at end of file diff --git a/src/modules/cpu/kernel/down_mixing.hpp b/src/modules/cpu/kernel/down_mixing.hpp new file mode 100644 index 000000000..9cefc64a2 --- /dev/null +++ b/src/modules/cpu/kernel/down_mixing.hpp @@ -0,0 +1,122 @@ +/* +MIT License + +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "rppdefs.h" +#include + +RppStatus down_mixing_host_tensor(Rpp32f *srcPtr, + RpptDescPtr srcDescPtr, + Rpp32f *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32s *srcDimsTensor, + bool normalizeWeights, + rpp::Handle& handle) +{ + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for(int batchCount = 0; batchCount < srcDescPtr->n; batchCount++) + { + Rpp32f *srcPtrTemp = srcPtr + batchCount * srcDescPtr->strides.nStride; + Rpp32f *dstPtrTemp = dstPtr + batchCount * dstDescPtr->strides.nStride; + + Rpp32s samples = srcDimsTensor[batchCount * 2]; + Rpp32s channels = srcDimsTensor[batchCount * 2 + 1]; + bool flagAVX = 0; + + if(channels == 1) + { + // No need of downmixing, do a direct memcpy + memcpy(dstPtrTemp, srcPtrTemp, (size_t)(samples * sizeof(Rpp32f))); + } + else + { + Rpp32f *weights = handle.GetInitHandle()->mem.mcpu.tempFloatmem + batchCount * channels; + std::fill(weights, weights + channels, 1.f / channels); + + if(normalizeWeights) + { + // Compute sum of the weights + Rpp32f sum = 0.0; + for(int i = 0; i < channels; i++) + sum += weights[i]; + + // Normalize the weights + Rpp32f invSum = 1.0 / sum; + for(int i = 0; i < channels; i++) + weights[i] *= invSum; + } + + Rpp32s channelIncrement = 4; + Rpp32s alignedChannels = (channels / 4) * 4; + if(channels > 7) + { + flagAVX = 1; + channelIncrement = 8; + alignedChannels = (channels / 8) * 8; + } + + // use weights to downmix to mono + for(int64_t dstIdx = 0; dstIdx < samples; dstIdx++) + { + Rpp32s channelLoopCount = 0; + // if number of channels are greater than or equal to 8, use AVX implementation + if(flagAVX) + { + __m256 pDst = avx_p0; + for(; channelLoopCount < alignedChannels; channelLoopCount += channelIncrement) + { + __m256 pSrc, pWeights; + pWeights = _mm256_setr_ps(weights[channelLoopCount], weights[channelLoopCount + 1], weights[channelLoopCount + 2], weights[channelLoopCount + 3], + weights[channelLoopCount + 4], weights[channelLoopCount + 5], weights[channelLoopCount + 6], weights[channelLoopCount + 7]); + pSrc = _mm256_loadu_ps(srcPtrTemp); + pSrc = _mm256_mul_ps(pSrc, pWeights); + pDst = _mm256_add_ps(pDst, pSrc); + srcPtrTemp += channelIncrement; + } + dstPtrTemp[dstIdx] = rpp_hsum_ps(pDst); + } + else + { + __m128 pDst = xmm_p0; + for(; channelLoopCount < alignedChannels; channelLoopCount += channelIncrement) + { + __m128 pSrc, pWeights; + pWeights = _mm_setr_ps(weights[channelLoopCount], weights[channelLoopCount + 1], weights[channelLoopCount + 2], weights[channelLoopCount + 3]); + pSrc = _mm_loadu_ps(srcPtrTemp); + pSrc = _mm_mul_ps(pSrc, pWeights); + pDst = _mm_add_ps(pDst, pSrc); + srcPtrTemp += channelIncrement; + } + dstPtrTemp[dstIdx] = rpp_hsum_ps(pDst); + } + for(; channelLoopCount < channels; channelLoopCount++) + dstPtrTemp[dstIdx] += ((*srcPtrTemp++) * weights[channelLoopCount]); + } + } + } + + return RPP_SUCCESS; +} diff --git a/src/modules/rppt_tensor_audio_augmentations.cpp b/src/modules/rppt_tensor_audio_augmentations.cpp index 23b52bc44..d78b8890a 100644 --- a/src/modules/rppt_tensor_audio_augmentations.cpp +++ b/src/modules/rppt_tensor_audio_augmentations.cpp @@ -126,3 +126,31 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, return RPP_ERROR_NOT_IMPLEMENTED; } } + +/******************** down_mixing ********************/ + +RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, + RpptDescPtr srcDescPtr, + RppPtr_t dstPtr, + RpptDescPtr dstDescPtr, + Rpp32s *srcDimsTensor, + bool normalizeWeights, + rppHandle_t rppHandle) +{ + if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) + { + down_mixing_host_tensor(static_cast(srcPtr), + srcDescPtr, + static_cast(dstPtr), + dstDescPtr, + srcDimsTensor, + normalizeWeights, + rpp::deref(rppHandle)); + + return RPP_SUCCESS; + } + else + { + return RPP_ERROR_NOT_IMPLEMENTED; + } +} diff --git a/utilities/test_suite/HOST/CMakeLists.txt b/utilities/test_suite/HOST/CMakeLists.txt index 9cb32ae78..b7abf5d77 100644 --- a/utilities/test_suite/HOST/CMakeLists.txt +++ b/utilities/test_suite/HOST/CMakeLists.txt @@ -109,7 +109,7 @@ else() endif() if(NOT libsnd_LIBS) - message("-- ${Yellow}Warning: libsndfile must be installed to install ${PROJECT_NAME}/Tensor_voxel_host successfully!${ColourReset}") + message("-- ${Yellow}Warning: libsndfile must be installed to install ${PROJECT_NAME}/Tensor_audio_host successfully!${ColourReset}") else() message("-- ${Green}${PROJECT_NAME} set to build with rpp and libsndfile ${ColourReset}") include_directories(${ROCM_PATH}/include ${ROCM_PATH}/include/rpp /usr/local/include) diff --git a/utilities/test_suite/HOST/Tensor_host_audio.cpp b/utilities/test_suite/HOST/Tensor_host_audio.cpp index 139e7e97e..fe6fa1246 100644 --- a/utilities/test_suite/HOST/Tensor_host_audio.cpp +++ b/utilities/test_suite/HOST/Tensor_host_audio.cpp @@ -197,6 +197,25 @@ int main(int argc, char **argv) break; } + case 3: + { + testCaseName = "down_mixing"; + bool normalizeWeights = false; + Rpp32s srcDimsTensor[batchSize * 2]; + + for (int i = 0, j = 0; i < batchSize; i++, j += 2) + { + srcDimsTensor[j] = srcLengthTensor[i]; + srcDimsTensor[j + 1] = channelsTensor[i]; + dstDims[i].height = srcLengthTensor[i]; + dstDims[i].width = 1; + } + + startWallTime = omp_get_wtime(); + rppt_down_mixing_host(inputf32, srcDescPtr, outputf32, dstDescPtr, srcDimsTensor, normalizeWeights, handle); + + break; + } default: { missingFuncFlag = 1; @@ -263,4 +282,4 @@ int main(int argc, char **argv) free(inputf32); free(outputf32); return 0; -} +} \ No newline at end of file diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py index c05a7a011..70ec00026 100644 --- a/utilities/test_suite/HOST/runAudioTests.py +++ b/utilities/test_suite/HOST/runAudioTests.py @@ -37,7 +37,7 @@ outFolderPath = os.getcwd() buildFolderPath = os.getcwd() caseMin = 0 -caseMax = 2 +caseMax = 3 # Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents def validate_and_remove_files(path): @@ -235,13 +235,31 @@ def rpp_test_suite_parser_and_validator(): exit(0) for case in caseList: + if "--input_path" not in sys.argv: + if case == "3": + srcPath = scriptPath + "/../TEST_AUDIO_FILES/three_sample_multi_channel_src1" + else: + srcPath = inFilePath + if int(case) < 0 or int(case) > 3: + print(f"Invalid case number {case}. Case number must be 0-3 range!") + continue + run_unit_test(srcPath, case, numRuns, testType, batchSize, outFilePath) else: for case in caseList: + if "--input_path" not in sys.argv: + if case == "3": + srcPath = scriptPath + "/../TEST_AUDIO_FILES/three_sample_multi_channel_src1" + else: + srcPath = inFilePath + if int(case) < 0 or int(case) > 3: + print(f"Invalid case number {case}. Case number must be 0-3 range!") + continue + run_performance_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath) # print the results of qa tests -supportedCaseList = ['0', '1', '2'] +supportedCaseList = ['0', '1', '2', '3'] nonQACaseList = [] # Add cases present in supportedCaseList, but without QA support if testType == 0: diff --git a/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/down_mixing/down_mixing.bin b/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/down_mixing/down_mixing.bin new file mode 100644 index 000000000..cb7c8bb84 Binary files /dev/null and b/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/down_mixing/down_mixing.bin differ diff --git a/utilities/test_suite/TEST_AUDIO_FILES/single_sample_multi_channel_src1/sample.wav b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample1.wav similarity index 100% rename from utilities/test_suite/TEST_AUDIO_FILES/single_sample_multi_channel_src1/sample.wav rename to utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample1.wav diff --git a/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample2.wav b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample2.wav new file mode 100644 index 000000000..4847f78cd Binary files /dev/null and b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample2.wav differ diff --git a/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample3.wav b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample3.wav new file mode 100644 index 000000000..a506e1762 Binary files /dev/null and b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample3.wav differ diff --git a/utilities/test_suite/rpp_test_suite_audio.h b/utilities/test_suite/rpp_test_suite_audio.h index 2ac174042..ec962270a 100644 --- a/utilities/test_suite/rpp_test_suite_audio.h +++ b/utilities/test_suite/rpp_test_suite_audio.h @@ -39,6 +39,7 @@ std::map audioAugmentationMap = {0, "non_silent_region_detection"}, {1, "to_decibels"}, {2, "pre_emphasis_filter"}, + {3, "down_mixing"}, }; // Golden outputs for Non Silent Region Detection @@ -137,7 +138,7 @@ void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dst // read data from golden outputs Rpp64u oBufferSize = dstDescPtr->n * dstDescPtr->strides.nStride; Rpp32f *refOutput = static_cast(malloc(oBufferSize * sizeof(float))); - string outFile = scriptPath + testCase + "/" + testCase + ".bin"; + string outFile = scriptPath + "/../REFERENCE_OUTPUTS_AUDIO/" + testCase + "/" + testCase + ".bin"; std::fstream fin(outFile, std::ios::in | std::ios::binary); if(fin.is_open()) {