r-abishek · sampath1117 · Mar 6, 2024 · Mar 12, 2024 · Mar 12, 2024 · Mar 19, 2024
diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h
@@ -46,16 +46,16 @@ extern "C" {
  * \details Non Silent Region Detection augmentation for 1D audio buffer
             \n Finds the starting index and length of non silent region in the audio buffer by comparing the
             calculated short-term power with cutoff value passed
- * \param[in] srcPtr source tensor in HOST memory
- * \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
- * \param[in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
- * \param[out] detectedIndexTensor beginning index of non silent region (1D tensor in HOST memory, of size batchSize)
- * \param[out] detectionLengthTensor length of non silent region  (1D tensor in HOST memory, of size batchSize)
- * \param[in] cutOffDB cutOff in dB below which the signal is considered silent
- * \param[in] windowLength window length used for computing short-term power of the signal
- * \param[in] referencePower reference power that is used to convert the signal to dB
- * \param[in] resetInterval number of samples after which the moving mean average is recalculated to avoid precision loss
- * \param[in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
+ * \param [out] detectedIndexTensor beginning index of non silent region (1D tensor in HOST memory, of size batchSize)
+ * \param [out] detectionLengthTensor length of non silent region  (1D tensor in HOST memory, of size batchSize)
+ * \param [in] cutOffDB cutOff in dB below which the signal is considered silent
+ * \param [in] windowLength window length used for computing short-term power of the signal
+ * \param [in] referencePower reference power that is used to convert the signal to dB
+ * \param [in] resetInterval number of samples after which the moving mean average is recalculated to avoid precision loss
+ * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
  * \return A <tt> \ref RppStatus</tt> enumeration.
  * \retval RPP_SUCCESS Successful completion.
  * \retval RPP_ERROR* Unsuccessful completion.
@@ -64,15 +64,15 @@ RppStatus rppt_non_silent_region_detection_host(RppPtr_t srcPtr, RpptDescPtr src
 
 /*! \brief To Decibels augmentation on HOST backend
  * \details To Decibels augmentation for 1D audio buffer converts magnitude values to decibel values
- * \param[in] srcPtr source tensor in HOST memory
- * \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
- * \param[out] dstPtr destination tensor in HOST memory
- * \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
- * \param[in] srcDims source tensor sizes for each element in batch (2D tensor in HOST memory, of size batchSize * 2)
- * \param[in] cutOffDB  minimum or cut-off ratio in dB
- * \param[in] multiplier factor by which the logarithm is multiplied
- * \param[in] referenceMagnitude Reference magnitude if not provided maximum value of input used as reference
- * \param[in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [out] dstPtr destination tensor in HOST memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [in] srcDims source tensor sizes for each element in batch (2D tensor in HOST memory, of size batchSize * 2)
+ * \param [in] cutOffDB  minimum or cut-off ratio in dB
+ * \param [in] multiplier factor by which the logarithm is multiplied
+ * \param [in] referenceMagnitude Reference magnitude if not provided maximum value of input used as reference
+ * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
  * \return A <tt> \ref RppStatus</tt> enumeration.
  * \retval RPP_SUCCESS Successful completion.
  * \retval RPP_ERROR* Unsuccessful completion.
@@ -81,14 +81,14 @@ RppStatus rppt_to_decibels_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_
 
 /*! \brief Pre Emphasis Filter augmentation on HOST backend
  * \details Pre Emphasis Filter augmentation for audio data
- * \param[in] srcPtr source tensor in HOST memory
- * \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
- * \param[out] dstPtr destination tensor in HOST memory
- * \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
- * \param[in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
- * \param[in] coeffTensor preemphasis coefficient (1D tensor in HOST memory, of size batchSize)
- * \param[in] borderType border value policy
- * \param[in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [out] dstPtr destination tensor in HOST memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param [in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
+ * \param [in] coeffTensor preemphasis coefficient (1D tensor in HOST memory, of size batchSize)
+ * \param [in] borderType border value policy
+ * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
  * \return A <tt> \ref RppStatus</tt> enumeration.
  * \retval RPP_SUCCESS Successful completion.
  * \retval RPP_ERROR* Unsuccessful completion.
@@ -97,19 +97,36 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr,
 
 /*! \brief Down Mixing augmentation on HOST backend
 * \details Down Mixing augmentation for audio data
-* \param[in] srcPtr source tensor in HOST memory
-* \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
-* \param[out] dstPtr destination tensor in HOST memory
-* \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
-* \param[in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
-* \param[in] normalizeWeights bool flag to specify if normalization of weights is needed
-* \param[in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+* \param [in] srcPtr source tensor in HOST memory
+* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param [out] dstPtr destination tensor in HOST memory
+* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
+* \param [in] normalizeWeights bool flag to specify if normalization of weights is needed
+* \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
 * \return A <tt> \ref RppStatus</tt> enumeration.
 * \retval RPP_SUCCESS Successful completion.
 * \retval RPP_ERROR* Unsuccessful completion.
 */
 RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);
 
+#ifdef GPU_SUPPORT
+/*! \brief Down Mixing augmentation on HIP backend
+* \details Down Mixing augmentation for audio data
+* \param [in] srcPtr source tensor in HIP memory
+* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param [out] dstPtr destination tensor in HIP memory
+* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 2, offsetInBytes >= 0, dataType = F32)
+* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HIP/Pinned memory, of size batchSize * 2)
+* \param [in] normalizeWeights bool flag to specify if normalization of weights is needed
+* \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
+* \return A <tt> \ref RppStatus</tt> enumeration.
+* \retval RPP_SUCCESS Successful completion.
+* \retval RPP_ERROR* Unsuccessful completion.
+*/
+RppStatus rppt_down_mixing_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);
+#endif // GPU_SUPPORT
+
 /*! \brief Produces a spectrogram from a 1D audio buffer on HOST backend
  * \details Spectrogram for 1D audio buffer
  * \param [in] srcPtr source tensor in HOST memory
@@ -153,15 +170,15 @@ RppStatus rppt_mel_filter_bank_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, Rpp
 
 /*! \brief Resample augmentation on HOST backend
 * \details Resample augmentation for audio data
-* \param[in] srcPtr source tensor in HOST memory
-* \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
-* \param[out] dstPtr destination tensor in HOST memory
-* \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
-* \param[in] inRate Input sampling rate (1D tensor in HOST memory, of size batchSize)
-* \param[in] outRate Output sampling rate (1D tensor in HOST memory, of size batchSize)
-* \param[in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
-* \param[in] window Resampling window (struct of type RpptRpptResamplingWindow)
-* \param[in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+* \param [in] srcPtr source tensor in HOST memory
+* \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param [out] dstPtr destination tensor in HOST memory
+* \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param [in] inRate Input sampling rate (1D tensor in HOST memory, of size batchSize)
+* \param [in] outRate Output sampling rate (1D tensor in HOST memory, of size batchSize)
+* \param [in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
+* \param [in] window Resampling window (struct of type RpptRpptResamplingWindow)
+* \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
 * \return A <tt> \ref RppStatus</tt> enumeration.
 * \retval RPP_SUCCESS Successful completion.
 * \retval RPP_ERROR* Unsuccessful completion.

diff --git a/src/modules/hip/hip_tensor_audio_augmentations.hpp b/src/modules/hip/hip_tensor_audio_augmentations.hpp
@@ -0,0 +1,30 @@
+/*
+MIT License
+
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#ifndef HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP
+#define HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP
+
+#include "kernel/down_mixing.hpp"
+
+#endif // HIP_TENSOR_AUDIO_AUGMENTATIONS_HPP
diff --git a/src/modules/hip/kernel/down_mixing.hpp b/src/modules/hip/kernel/down_mixing.hpp
@@ -0,0 +1,73 @@
+#include <hip/hip_runtime.h>
+#include "rpp_hip_common.hpp"
+
+__global__ void down_mixing_hip_tensor(float *srcPtr,
+                                       uint srcStride,
+                                       float *dstPtr,
+                                       uint dstStride,
+                                       int2 *srcDimsTensor)
+
+{
+    int id_x = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x;
+    int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z;
+    int srcLength = srcDimsTensor[id_z].x;
+    int channels = srcDimsTensor[id_z].y;
+
+    if (id_x >= srcLength)
+        return;
+
+    float nomalizedWeight = 1.f / channels;
+    float outVal = 0.0f;
+    uint srcIdx = id_z * srcStride + id_x * channels;
+    int i = 0;
+    int alignedChannels = (channels / 8) * 8;
+    // if number of channels is a multiple of 8, do 8 pixel load till alignedChannels value
+    if (alignedChannels)
+    {
+        d_float8 outVal_f8;
+        outVal_f8.f4[0] = static_cast<float4>(0.0f);
+        outVal_f8.f4[1] = outVal_f8.f4[0];
+        float4 normalizedWeight_f4 = static_cast<float4>(nomalizedWeight);
+        for(; i < alignedChannels; i += 8, srcIdx += 8)
+        {
+            d_float8 src_f8;
+            rpp_hip_load8_and_unpack_to_float8(srcPtr + srcIdx, &src_f8);
+            rpp_hip_math_multiply8_const(&src_f8, &src_f8, normalizedWeight_f4);
+            rpp_hip_math_add8(&outVal_f8, &src_f8, &outVal_f8);
+        }
+        outVal_f8.f4[0] += outVal_f8.f4[1];
+        outVal += (outVal_f8.f1[0] + outVal_f8.f1[1] + outVal_f8.f1[2] + outVal_f8.f1[3]);
+    }
+    // process remaining channels
+    for(; i < channels; i++, srcIdx++)
+        outVal += srcPtr[srcIdx] * nomalizedWeight;
+
+    uint dstIdx = id_z * dstStride + id_x;
+    dstPtr[dstIdx] = outVal;
+}
+
+RppStatus hip_exec_down_mixing_tensor(Rpp32f *srcPtr,
+                                      RpptDescPtr srcDescPtr,
+                                      Rpp32f *dstPtr,
+                                      RpptDescPtr dstDescPtr,
+                                      Rpp32s *srcDimsTensor,
+                                      bool normalizeWeights,
+                                      rpp::Handle& handle)
+{
+    Rpp32s globalThreads_x = dstDescPtr->strides.nStride;
+    Rpp32s globalThreads_y = 1;
+    Rpp32s globalThreads_z = dstDescPtr->n;
+
+    hipLaunchKernelGGL(down_mixing_hip_tensor,
+                       dim3(ceil((Rpp32f)globalThreads_x/LOCAL_THREADS_X_1DIM), ceil((Rpp32f)globalThreads_y/LOCAL_THREADS_Y_1DIM), ceil((Rpp32f)globalThreads_z/LOCAL_THREADS_Z_1DIM)),
+                       dim3(LOCAL_THREADS_X_1DIM, LOCAL_THREADS_Y_1DIM, LOCAL_THREADS_Z_1DIM),
+                       0,
+                       handle.GetStream(),
+                       srcPtr,
+                       srcDescPtr->strides.nStride,
+                       dstPtr,
+                       dstDescPtr->strides.nStride,
+                       reinterpret_cast<int2 *>(srcDimsTensor));
+
+    return RPP_SUCCESS;
+}
diff --git a/src/modules/rppt_tensor_audio_augmentations.cpp b/src/modules/rppt_tensor_audio_augmentations.cpp
@@ -27,6 +27,10 @@ SOFTWARE.
 #include "rppt_tensor_audio_augmentations.h"
 #include "cpu/host_tensor_audio_augmentations.hpp"
 
+#ifdef HIP_COMPILE
+    #include "hip/hip_tensor_audio_augmentations.hpp"
+#endif // HIP_COMPILE
+
 /******************** non_silent_region_detection ********************/
 
 RppStatus rppt_non_silent_region_detection_host(RppPtr_t srcPtr,
@@ -268,3 +272,43 @@ RppStatus rppt_resample_host(RppPtr_t srcPtr,
         return RPP_ERROR_NOT_IMPLEMENTED;
     }
 }
+
+/********************************************************************************************************************/
+/*********************************************** RPP_GPU_SUPPORT = ON ***********************************************/
+/********************************************************************************************************************/
+
+#ifdef GPU_SUPPORT
+
+/******************** down_mixing ********************/
+
+RppStatus rppt_down_mixing_gpu(RppPtr_t srcPtr,
+                               RpptDescPtr srcDescPtr,
+                               RppPtr_t dstPtr,
+                               RpptDescPtr dstDescPtr,
+                               Rpp32s *srcDimsTensor,
+                               bool  normalizeWeights,
+                               rppHandle_t rppHandle)
+{
+#ifdef HIP_COMPILE
+    if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
+    {
+        hip_exec_down_mixing_tensor(static_cast<Rpp32f*>(srcPtr),
+                                    srcDescPtr,
+                                    static_cast<Rpp32f*>(dstPtr),
+                                    dstDescPtr,
+                                    srcDimsTensor,
+                                    normalizeWeights,
+                                    rpp::deref(rppHandle));
+    }
+    else
+    {
+        return RPP_ERROR_NOT_IMPLEMENTED;
+    }
+
+    return RPP_SUCCESS;
+#elif defined(OCL_COMPILE)
+    return RPP_ERROR_NOT_IMPLEMENTED;
+#endif // backend
+}
+
+#endif // GPU_SUPPORT
diff --git a/utilities/test_suite/HIP/CMakeLists.txt b/utilities/test_suite/HIP/CMakeLists.txt
@@ -58,6 +58,10 @@ find_package(hip QUIET)
 find_package(OpenCV QUIET)
 find_package(TurboJpeg QUIET)
 find_package(NIFTI QUIET)
+find_library(libsnd_LIBS
+    NAMES sndfile libsndfile
+    PATHS ${CMAKE_SYSTEM_PREFIX_PATH} ${LIBSND_ROOT_DIR} "/usr/local"
+    PATH_SUFFIXES lib lib64)
 
 # OpenMP
 find_package(OpenMP REQUIRED)
@@ -102,4 +106,23 @@ if(NIFTI_FOUND AND OpenCV_FOUND)
     target_link_libraries(Tensor_voxel_hip ${OpenCV_LIBS} -lturbojpeg -lrpp ${hip_LIBRARIES} pthread ${LINK_LIBRARY_LIST} hip::device ${NIFTI_PACKAGE_PREFIX}NIFTI::${NIFTI_PACKAGE_PREFIX}niftiio)
 else()
     message("-- ${Yellow}Warning: libniftiio must be installed to install ${PROJECT_NAME}/Tensor_voxel_hip successfully!${ColourReset}")
-endif()
+endif()
+
+if(NOT libsnd_LIBS)
+    message("-- ${Yellow}Warning: libsndfile must be installed to install ${PROJECT_NAME}/Tensor_audio_host successfully!${ColourReset}")
+else()
+    message("-- ${Green}${PROJECT_NAME} set to build with rpp and libsndfile ${ColourReset}")
+    set(COMPILER_FOR_HIP ${ROCM_PATH}/bin/hipcc)
+    set(CMAKE_CXX_COMPILER ${COMPILER_FOR_HIP})
+    include_directories(${ROCM_PATH}/include ${ROCM_PATH}/include/rpp /usr/local/include)
+    link_directories(${ROCM_PATH}/lib /usr/local/lib)
+    include_directories(${SndFile_INCLUDE_DIRS})
+    link_directories(${SndFile_LIBRARIES_DIR} /usr/local/lib/)
+
+    add_executable(Tensor_audio_hip Tensor_audio_hip.cpp)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++17")
+    if(NOT APPLE)
+        set(LINK_LIBRARY_LIST ${LINK_LIBRARY_LIST} stdc++fs)
+    endif()
+    target_link_libraries(Tensor_audio_hip ${libsnd_LIBS} -lsndfile -lrpp pthread ${LINK_LIBRARY_LIST})
+endif()