diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h
index 138b3baa8..31bb34eff 100644
--- a/include/rppt_tensor_audio_augmentations.h
+++ b/include/rppt_tensor_audio_augmentations.h
@@ -95,7 +95,22 @@ RppStatus rppt_to_decibels_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_
*/
RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, Rpp32f *coeffTensor, RpptAudioBorderType borderType, rppHandle_t rppHandle);
+/*! \brief Down Mixing augmentation on HOST backend
+* \details Down Mixing augmentation for audio data
+* \param[in] srcPtr source tensor in HOST memory
+* \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param[out] dstPtr destination tensor in HOST memory
+* \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param[in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
+* \param[in] normalizeWeights bool flag to specify if normalization of weights is needed
+* \param[in] rppHandle RPP HOST handle created with \ref rppCreateWithBatchSize()
+* \return A \ref RppStatus enumeration.
+* \retval RPP_SUCCESS Successful completion.
+* \retval RPP_ERROR* Unsuccessful completion.
+*/
+RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);
+
#ifdef __cplusplus
}
#endif
-#endif // RPPT_TENSOR_AUDIO_AUGMENTATIONS_H
\ No newline at end of file
+#endif // RPPT_TENSOR_AUDIO_AUGMENTATIONS_H
diff --git a/src/include/cpu/rpp_cpu_simd.hpp b/src/include/cpu/rpp_cpu_simd.hpp
index 84c898b90..ff30de027 100644
--- a/src/include/cpu/rpp_cpu_simd.hpp
+++ b/src/include/cpu/rpp_cpu_simd.hpp
@@ -2438,6 +2438,29 @@ static inline __m128 log_ps(__m128 x)
return x;
}
+inline Rpp32f rpp_hsum_ps(__m128 x)
+{
+ __m128 shuf = _mm_movehdup_ps(x); // broadcast elements 3,1 to 2,0
+ __m128 sums = _mm_add_ps(x, shuf);
+ shuf = _mm_movehl_ps(shuf, sums); // high half -> low half
+ sums = _mm_add_ss(sums, shuf);
+ return _mm_cvtss_f32(sums);
+}
+
+inline Rpp32f rpp_hsum_ps(__m256 x)
+{
+ __m128 p0 = _mm256_extractf128_ps(x, 1); // Contains x7, x6, x5, x4
+ __m128 p1 = _mm256_castps256_ps128(x); // Contains x3, x2, x1, x0
+ __m128 sum = _mm_add_ps(p0, p1); // Contains x3 + x7, x2 + x6, x1 + x5, x0 + x4
+ p0 = sum; // Contains -, -, x1 + x5, x0 + x4
+ p1 = _mm_movehl_ps(sum, sum); // Contains -, -, x3 + x7, x2 + x6
+ sum = _mm_add_ps(p0, p1); // Contains -, -, x1 + x3 + x5 + x7, x0 + x2 + x4 + x6
+ p0 = sum; // Contains -, -, -, x0 + x2 + x4 + x6
+ p1 = _mm_shuffle_ps(sum, sum, 0x1); // Contains -, -, -, x1 + x3 + x5 + x7
+ sum = _mm_add_ss(p0, p1); // Contains -, -, -, x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7
+ return _mm_cvtss_f32(sum);
+}
+
static inline void fast_matmul4x4_sse(float *A, float *B, float *C)
{
__m128 row1 = _mm_load_ps(&B[0]); // Row 0 of B
diff --git a/src/modules/cpu/host_tensor_audio_augmentations.hpp b/src/modules/cpu/host_tensor_audio_augmentations.hpp
index 7737b38c3..e2edb1afc 100644
--- a/src/modules/cpu/host_tensor_audio_augmentations.hpp
+++ b/src/modules/cpu/host_tensor_audio_augmentations.hpp
@@ -28,5 +28,6 @@ SOFTWARE.
#include "kernel/non_silent_region_detection.hpp"
#include "kernel/to_decibels.hpp"
#include "kernel/pre_emphasis_filter.hpp"
+#include "kernel/down_mixing.hpp"
#endif // HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP
\ No newline at end of file
diff --git a/src/modules/cpu/kernel/down_mixing.hpp b/src/modules/cpu/kernel/down_mixing.hpp
new file mode 100644
index 000000000..9cefc64a2
--- /dev/null
+++ b/src/modules/cpu/kernel/down_mixing.hpp
@@ -0,0 +1,122 @@
+/*
+MIT License
+
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include "rppdefs.h"
+#include
+
+RppStatus down_mixing_host_tensor(Rpp32f *srcPtr,
+ RpptDescPtr srcDescPtr,
+ Rpp32f *dstPtr,
+ RpptDescPtr dstDescPtr,
+ Rpp32s *srcDimsTensor,
+ bool normalizeWeights,
+ rpp::Handle& handle)
+{
+ Rpp32u numThreads = handle.GetNumThreads();
+
+ omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+ for(int batchCount = 0; batchCount < srcDescPtr->n; batchCount++)
+ {
+ Rpp32f *srcPtrTemp = srcPtr + batchCount * srcDescPtr->strides.nStride;
+ Rpp32f *dstPtrTemp = dstPtr + batchCount * dstDescPtr->strides.nStride;
+
+ Rpp32s samples = srcDimsTensor[batchCount * 2];
+ Rpp32s channels = srcDimsTensor[batchCount * 2 + 1];
+ bool flagAVX = 0;
+
+ if(channels == 1)
+ {
+ // No need of downmixing, do a direct memcpy
+ memcpy(dstPtrTemp, srcPtrTemp, (size_t)(samples * sizeof(Rpp32f)));
+ }
+ else
+ {
+ Rpp32f *weights = handle.GetInitHandle()->mem.mcpu.tempFloatmem + batchCount * channels;
+ std::fill(weights, weights + channels, 1.f / channels);
+
+ if(normalizeWeights)
+ {
+ // Compute sum of the weights
+ Rpp32f sum = 0.0;
+ for(int i = 0; i < channels; i++)
+ sum += weights[i];
+
+ // Normalize the weights
+ Rpp32f invSum = 1.0 / sum;
+ for(int i = 0; i < channels; i++)
+ weights[i] *= invSum;
+ }
+
+ Rpp32s channelIncrement = 4;
+ Rpp32s alignedChannels = (channels / 4) * 4;
+ if(channels > 7)
+ {
+ flagAVX = 1;
+ channelIncrement = 8;
+ alignedChannels = (channels / 8) * 8;
+ }
+
+ // use weights to downmix to mono
+ for(int64_t dstIdx = 0; dstIdx < samples; dstIdx++)
+ {
+ Rpp32s channelLoopCount = 0;
+ // if number of channels are greater than or equal to 8, use AVX implementation
+ if(flagAVX)
+ {
+ __m256 pDst = avx_p0;
+ for(; channelLoopCount < alignedChannels; channelLoopCount += channelIncrement)
+ {
+ __m256 pSrc, pWeights;
+ pWeights = _mm256_setr_ps(weights[channelLoopCount], weights[channelLoopCount + 1], weights[channelLoopCount + 2], weights[channelLoopCount + 3],
+ weights[channelLoopCount + 4], weights[channelLoopCount + 5], weights[channelLoopCount + 6], weights[channelLoopCount + 7]);
+ pSrc = _mm256_loadu_ps(srcPtrTemp);
+ pSrc = _mm256_mul_ps(pSrc, pWeights);
+ pDst = _mm256_add_ps(pDst, pSrc);
+ srcPtrTemp += channelIncrement;
+ }
+ dstPtrTemp[dstIdx] = rpp_hsum_ps(pDst);
+ }
+ else
+ {
+ __m128 pDst = xmm_p0;
+ for(; channelLoopCount < alignedChannels; channelLoopCount += channelIncrement)
+ {
+ __m128 pSrc, pWeights;
+ pWeights = _mm_setr_ps(weights[channelLoopCount], weights[channelLoopCount + 1], weights[channelLoopCount + 2], weights[channelLoopCount + 3]);
+ pSrc = _mm_loadu_ps(srcPtrTemp);
+ pSrc = _mm_mul_ps(pSrc, pWeights);
+ pDst = _mm_add_ps(pDst, pSrc);
+ srcPtrTemp += channelIncrement;
+ }
+ dstPtrTemp[dstIdx] = rpp_hsum_ps(pDst);
+ }
+ for(; channelLoopCount < channels; channelLoopCount++)
+ dstPtrTemp[dstIdx] += ((*srcPtrTemp++) * weights[channelLoopCount]);
+ }
+ }
+ }
+
+ return RPP_SUCCESS;
+}
diff --git a/src/modules/rppt_tensor_audio_augmentations.cpp b/src/modules/rppt_tensor_audio_augmentations.cpp
index 23b52bc44..d78b8890a 100644
--- a/src/modules/rppt_tensor_audio_augmentations.cpp
+++ b/src/modules/rppt_tensor_audio_augmentations.cpp
@@ -126,3 +126,31 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr,
return RPP_ERROR_NOT_IMPLEMENTED;
}
}
+
+/******************** down_mixing ********************/
+
+RppStatus rppt_down_mixing_host(RppPtr_t srcPtr,
+ RpptDescPtr srcDescPtr,
+ RppPtr_t dstPtr,
+ RpptDescPtr dstDescPtr,
+ Rpp32s *srcDimsTensor,
+ bool normalizeWeights,
+ rppHandle_t rppHandle)
+{
+ if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
+ {
+ down_mixing_host_tensor(static_cast(srcPtr),
+ srcDescPtr,
+ static_cast(dstPtr),
+ dstDescPtr,
+ srcDimsTensor,
+ normalizeWeights,
+ rpp::deref(rppHandle));
+
+ return RPP_SUCCESS;
+ }
+ else
+ {
+ return RPP_ERROR_NOT_IMPLEMENTED;
+ }
+}
diff --git a/utilities/test_suite/HOST/CMakeLists.txt b/utilities/test_suite/HOST/CMakeLists.txt
index 9cb32ae78..b7abf5d77 100644
--- a/utilities/test_suite/HOST/CMakeLists.txt
+++ b/utilities/test_suite/HOST/CMakeLists.txt
@@ -109,7 +109,7 @@ else()
endif()
if(NOT libsnd_LIBS)
- message("-- ${Yellow}Warning: libsndfile must be installed to install ${PROJECT_NAME}/Tensor_voxel_host successfully!${ColourReset}")
+ message("-- ${Yellow}Warning: libsndfile must be installed to install ${PROJECT_NAME}/Tensor_audio_host successfully!${ColourReset}")
else()
message("-- ${Green}${PROJECT_NAME} set to build with rpp and libsndfile ${ColourReset}")
include_directories(${ROCM_PATH}/include ${ROCM_PATH}/include/rpp /usr/local/include)
diff --git a/utilities/test_suite/HOST/Tensor_host_audio.cpp b/utilities/test_suite/HOST/Tensor_host_audio.cpp
index 139e7e97e..fe6fa1246 100644
--- a/utilities/test_suite/HOST/Tensor_host_audio.cpp
+++ b/utilities/test_suite/HOST/Tensor_host_audio.cpp
@@ -197,6 +197,25 @@ int main(int argc, char **argv)
break;
}
+ case 3:
+ {
+ testCaseName = "down_mixing";
+ bool normalizeWeights = false;
+ Rpp32s srcDimsTensor[batchSize * 2];
+
+ for (int i = 0, j = 0; i < batchSize; i++, j += 2)
+ {
+ srcDimsTensor[j] = srcLengthTensor[i];
+ srcDimsTensor[j + 1] = channelsTensor[i];
+ dstDims[i].height = srcLengthTensor[i];
+ dstDims[i].width = 1;
+ }
+
+ startWallTime = omp_get_wtime();
+ rppt_down_mixing_host(inputf32, srcDescPtr, outputf32, dstDescPtr, srcDimsTensor, normalizeWeights, handle);
+
+ break;
+ }
default:
{
missingFuncFlag = 1;
@@ -263,4 +282,4 @@ int main(int argc, char **argv)
free(inputf32);
free(outputf32);
return 0;
-}
+}
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py
index c05a7a011..70ec00026 100644
--- a/utilities/test_suite/HOST/runAudioTests.py
+++ b/utilities/test_suite/HOST/runAudioTests.py
@@ -37,7 +37,7 @@
outFolderPath = os.getcwd()
buildFolderPath = os.getcwd()
caseMin = 0
-caseMax = 2
+caseMax = 3
# Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents
def validate_and_remove_files(path):
@@ -235,13 +235,31 @@ def rpp_test_suite_parser_and_validator():
exit(0)
for case in caseList:
+ if "--input_path" not in sys.argv:
+ if case == "3":
+ srcPath = scriptPath + "/../TEST_AUDIO_FILES/three_sample_multi_channel_src1"
+ else:
+ srcPath = inFilePath
+ if int(case) < 0 or int(case) > 3:
+ print(f"Invalid case number {case}. Case number must be 0-3 range!")
+ continue
+
run_unit_test(srcPath, case, numRuns, testType, batchSize, outFilePath)
else:
for case in caseList:
+ if "--input_path" not in sys.argv:
+ if case == "3":
+ srcPath = scriptPath + "/../TEST_AUDIO_FILES/three_sample_multi_channel_src1"
+ else:
+ srcPath = inFilePath
+ if int(case) < 0 or int(case) > 3:
+ print(f"Invalid case number {case}. Case number must be 0-3 range!")
+ continue
+
run_performance_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath)
# print the results of qa tests
-supportedCaseList = ['0', '1', '2']
+supportedCaseList = ['0', '1', '2', '3']
nonQACaseList = [] # Add cases present in supportedCaseList, but without QA support
if testType == 0:
diff --git a/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/down_mixing/down_mixing.bin b/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/down_mixing/down_mixing.bin
new file mode 100644
index 000000000..cb7c8bb84
Binary files /dev/null and b/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/down_mixing/down_mixing.bin differ
diff --git a/utilities/test_suite/TEST_AUDIO_FILES/single_sample_multi_channel_src1/sample.wav b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample1.wav
similarity index 100%
rename from utilities/test_suite/TEST_AUDIO_FILES/single_sample_multi_channel_src1/sample.wav
rename to utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample1.wav
diff --git a/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample2.wav b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample2.wav
new file mode 100644
index 000000000..4847f78cd
Binary files /dev/null and b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample2.wav differ
diff --git a/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample3.wav b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample3.wav
new file mode 100644
index 000000000..a506e1762
Binary files /dev/null and b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample3.wav differ
diff --git a/utilities/test_suite/rpp_test_suite_audio.h b/utilities/test_suite/rpp_test_suite_audio.h
index 2ac174042..ec962270a 100644
--- a/utilities/test_suite/rpp_test_suite_audio.h
+++ b/utilities/test_suite/rpp_test_suite_audio.h
@@ -39,6 +39,7 @@ std::map audioAugmentationMap =
{0, "non_silent_region_detection"},
{1, "to_decibels"},
{2, "pre_emphasis_filter"},
+ {3, "down_mixing"},
};
// Golden outputs for Non Silent Region Detection
@@ -137,7 +138,7 @@ void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dst
// read data from golden outputs
Rpp64u oBufferSize = dstDescPtr->n * dstDescPtr->strides.nStride;
Rpp32f *refOutput = static_cast(malloc(oBufferSize * sizeof(float)));
- string outFile = scriptPath + testCase + "/" + testCase + ".bin";
+ string outFile = scriptPath + "/../REFERENCE_OUTPUTS_AUDIO/" + testCase + "/" + testCase + ".bin";
std::fstream fin(outFile, std::ios::in | std::ios::binary);
if(fin.is_open())
{