RPP Tensor Audio Support - Non Silent Region (r-abishek#224)

* Initial commit - Non slient region detection Includes unittest setup * Replace vectors with arrays * Replace Rpp64s with Rpp32s * Optimize and precompute cutOff * Fix buffer used * Fix buffer used * Update testsuite for Audio * code cleanup * Add Readme file for Audio test suite * changes based on review comments * minor change * Remove unittest folders and updated README.md * Remove unit tests * minor change * code cleanup * added common header file for audio helper functions * removed unncessary audio wav files fixed bug in ROI updation for audio test suite resolved issue in summary generation for performance tests in python * removed log file * added doxygen support for audio * minor change * removed the usage of getMax function and used std::max_element * modularized code in test suite * minor change * resolved codacy warnings * Codacy fix - Remove unused cpuTime * CMakeLists - Version Update 1.5.0 - TOT Version * CHANGELOG Updates Version 1.5.0 placeholder * resolved issue with file_system dependency in test suite * Doxygen changes changed malloc to new in NSR kernel * added ctests for audio test suite for CI made changes to add more clarity on the QA Tests results * Cmake mods for ctest * HOST-only build error bugfix * added qa mode paramter to python audio script added golden output map for QA testing of Non silent region detection * minor change * added example for MMS calculation in comments for better understanding * updated info used to for running audio test suite * removed bitdepth variable from audio test suite * added more information on computing NSR outputs in the example added * added variables for min, max case in python test suite * CTest - Audio Test Updates Run audio tests only if audio package found * CI - Update common.groovy Add audio test support * Codacy fix * Update common.groovy - Install Test Deps --------- Co-authored-by: Snehaa Giridharan <snehaa@multicorewareinc.com> Co-authored-by: HazarathKumarM <hazarathkumar@multicorewareinc.com> Co-authored-by: sampath1117 <sampath.rachumallu@multicorewareinc.com> Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com> Co-authored-by: Kiriti Gowda <kiriti.nageshgowda@amd.com>
snehaa8 · Nov 21, 2023 · 1f5f3f5 · 1f5f3f5
1 parent 16d4892
commit 1f5f3f5
Show file tree

Hide file tree

Showing 22 changed files with 1,260 additions and 45 deletions.
diff --git a/.Doxyfile b/.Doxyfile
@@ -966,7 +966,8 @@ INPUT                  = README.md \
                         include/rppt_tensor_filter_augmentations.h \
                         include/rppt_tensor_geometric_augmentations.h \
                         include/rppt_tensor_morphological_operations.h \
-                        include/rppt_tensor_statistical_operations.h
+                        include/rppt_tensor_statistical_operations.h \
+                        include/rppt_tensor_audio_augmentations.h
 
 
 # This tag can be used to specify the character encoding of the source files

diff --git a/.jenkins/common.groovy b/.jenkins/common.groovy
@@ -8,16 +8,22 @@ def runCompileCommand(platform, project, jobName, boolean debug=false, boolean s
     String buildTypeDir = debug ? 'debug' : 'release'
     String backend = 'HIP'
     String enableSCL = 'echo build-rpp'
+    String enableAudioTesting = 'echo audio-tests-not-supported'
 
     if (platform.jenkinsLabel.contains('centos')) {
         backend = 'CPU'
-        if (platform.jenkinsLabel.contains('centos7')) {
-            enableSCL = 'source scl_source enable llvm-toolset-7'
+        enableSCL = 'source scl_source enable llvm-toolset-7'
+    }
+    else if (platform.jenkinsLabel.contains('ubuntu')) {
+        enableAudioTesting = 'sudo apt-get install -y libsndfile1-dev'
+        if (platform.jenkinsLabel.contains('ubuntu20')) {
+            backend = 'OCL'
         }
     }
-    else if (platform.jenkinsLabel.contains('ubuntu20')) {
-        backend = 'OCL'
+    else if (platform.jenkinsLabel.contains('rhel')) {
+        enableAudioTesting = 'sudo yum install -y libsndfile-devel'
     }
+
 
     def command = """#!/usr/bin/env bash
                 set -x
@@ -29,6 +35,7 @@ def runCompileCommand(platform, project, jobName, boolean debug=false, boolean s
                 cd ${project.paths.project_build_prefix}
                 mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir}
                 ${enableSCL}
+                ${enableAudioTesting}
                 cmake -DBACKEND=${backend} ${buildTypeArg} ../..
                 make -j\$(nproc)
                 sudo make install

diff --git a/README.md b/README.md
@@ -157,6 +157,11 @@ To use RPP, you must have installed the following:
   make -j$nproc
   sudo make install
   ```
+* Libsndfile installation
+  ```
+  sudo apt-get update
+  sudo apt-get install libsndfile1-dev
+  ```
 
 ## Build and install RPP
 

diff --git a/include/rppt.h b/include/rppt.h
@@ -42,6 +42,7 @@ extern "C" {
 #include "rppt_tensor_geometric_augmentations.h"
 #include "rppt_tensor_morphological_operations.h"
 #include "rppt_tensor_statistical_operations.h"
+#include "rppt_tensor_audio_augmentations.h"
 
 #ifdef __cplusplus
 }

diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h
@@ -0,0 +1,66 @@
+/*
+Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#ifndef RPPT_TENSOR_AUDIO_AUGMENTATIONS_H
+#define RPPT_TENSOR_AUDIO_AUGMENTATIONS_H
+
+/*!
+ * \file
+ * \brief RPPT Tensor Operations - Audio Augmentations.
+ * \defgroup group_rppt_tensor_audio_augmentations RPPT Tensor Operations - Audio Augmentations.
+ * \brief RPPT Tensor Operations - Audio Augmentations.
+ */
+
+/*! \addtogroup group_rppt_tensor_audio_augmentations
+ * @{
+ */
+
+#include "rpp.h"
+#include "rppdefs.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*! \brief Non Silent Region Detection augmentation on HOST backend
+ * \details Non Silent Region Detection augmentation for 1D audio buffer
+            \n Finds the starting index and length of non silent region in the audio buffer by comparing the
+            calculated short-term power with cutoff value passed
+ * \param[in] srcPtr source tensor in HOST memory
+ * \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+ * \param[in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
+ * \param[out] detectedIndexTensor beginning index of non silent region (1D tensor in HOST memory, of size batchSize)
+ * \param[out] detectionLengthTensor length of non silent region  (1D tensor in HOST memory, of size batchSize)
+ * \param[in] cutOffDB cutOff in dB below which the signal is considered silent
+ * \param[in] windowLength window length used for computing short-term power of the signal
+ * \param[in] referencePower reference power that is used to convert the signal to dB
+ * \param[in] resetInterval number of samples after which the moving mean average is recalculated to avoid precision loss
+ * \param[in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_non_silent_region_detection_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, Rpp32s *srcLengthTensor, Rpp32f *detectedIndexTensor, Rpp32f *detectionLengthTensor, Rpp32f cutOffDB, Rpp32s windowLength, Rpp32f referencePower, Rpp32s resetInterval, rppHandle_t rppHandle);
+
+#ifdef __cplusplus
+}
+#endif
+#endif // RPPT_TENSOR_AUDIO_AUGMENTATIONS_H
diff --git a/src/modules/CMakeLists.txt b/src/modules/CMakeLists.txt
@@ -92,6 +92,7 @@ if( "${BACKEND}" STREQUAL "HIP")
     # Set HIP compiler and flags
     set(CMAKE_CXX_COMPILER ${COMPILER_FOR_HIP})
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_HIPCC_FLAGS}")
+    set_source_files_properties(rppt_tensor_audio_augmentations.cpp PROPERTIES COMPILE_FLAGS -mno-fma)
 
     # Add HIP specific preprocessor flags
     add_definitions(-DHIP_COMPILE)
@@ -105,6 +106,7 @@ elseif( "${BACKEND}" STREQUAL "OCL")
     file(GLOB MOD_CL_CPP "cl/*.cpp" )
     list(APPEND Rpp_Source ${CPPFILES} ${MOD_CL_CPP})
     message("-- ${Green}OpenCL kernels added!${ColourReset}")
+    set_source_files_properties(rppt_tensor_audio_augmentations.cpp PROPERTIES COMPILE_FLAGS -mno-fma)
 
     # Add OpenCL specific preprocessor flags
     add_definitions(-DOCL_COMPILE)
@@ -118,6 +120,7 @@ elseif( "${BACKEND}" STREQUAL "OCL")
 elseif( "${BACKEND}" STREQUAL "CPU")
     # Add CPU specific includes
     set(INCLUDE_LIST ${CMAKE_SOURCE_DIR}/src/include/common/)
+    set_source_files_properties(rppt_tensor_audio_augmentations.cpp PROPERTIES COMPILE_FLAGS -mno-fma)
 endif()
 message("-- ${White}AMD RPP ${PROJECT_NAME} -- Include Directories:${INCLUDE_LIST}${ColourReset}")
 add_compile_options("-Wno-unused-result")

diff --git a/src/modules/cpu/host_tensor_audio_augmentations.hpp b/src/modules/cpu/host_tensor_audio_augmentations.hpp
@@ -0,0 +1,25 @@
+/*
+Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#ifndef HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP
+#define HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP
+
+#include "kernel/non_silent_region_detection.hpp"
+
+#endif // HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP
diff --git a/src/modules/cpu/kernel/non_silent_region_detection.hpp b/src/modules/cpu/kernel/non_silent_region_detection.hpp
@@ -0,0 +1,192 @@
+/*
+Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+
+/* Non Silent Region Detection requires Moving Mean Square (MMS) computation on input audio data
+MMS buffer is a 1D buffer having same length as input audio. The algorithm used for MMS computation is explained with a sample use case
+
+Example:
+Input: [1, 2, 3, 4, 5, 6, 7, 8]
+audio_length = 8
+window_length = 3
+reset_interval_length = 4
+
+window_begin = -window_length + 1 = -2
+window_factor = 1 / window_length = 1/3
+
+MMS computation is divided into blocks of reset interval length
+num_blocks = audio_length / reset_interval_length
+For the above example we will have
+    - 2 blocks (8 / 4)
+    - each block runs for 4 iterations
+    - in each iteration window begin value is increment by 1
+
+Block1
+window begin = -2
+Iteration 0:    sum_of_squares = 1*1                              // window begin = -2
+                store sum_of_squares * window_factor in MMS[0]
+
+Iteration 1:    sum_of_squares = 1*1 + 2*2                        // window begin = -1
+                store sum_of_squares * window_factor in MMS[1]
+
+Iteration 2:    sum_of_squares = 1*1 + 2*2 + 3*3                  // window begin =  0
+                store sum_of_squares * window_factor in MMS[2]
+                sum_of_squares -= 1*1
+
+Iteration 3:    sum_of_squares = 2*2 + 3*3 + 4*4                  // window begin =  1
+                store sum_of_squares * window_factor in MMS[3]
+                sum_of_squares -= 2*2
+
+Block2
+Iteration 0:    sum_of_squares = 3*3 + 4*4 + 5*5                  // window begin = 2
+                store sum_of_squares * window_factor in MMS[4]
+                sum_of_squares -= 3*3
+
+Iteration 1:    sum_of_squares = 4*4 + 5*5 + 6*6                 // window begin = 3
+                store sum_of_squares * window_factor in MMS[5]
+                sum_of_squares -= 4*4
+
+Iteration 2:    sum_of_squares = 5*5 + 6*6 + 7*7                 // window begin = 4
+                store sum_of_squares * window_factor in MMS[6]
+                sum_of_squares -= 5*5
+
+Iteration 3:    sum_of_squares  = 6*6 + 7*7 + 8*8                // window begin = 5
+                store sum_of_squares * window_factor in MMS[7]
+                sum_of_squares -= 6*6
+
+For computing beginning index and length of Non Silent Region in audio data we traverse over
+the entire MMS buffer and compare these values with the calculated cutoff value
+    - For beginning index, traverse over MMS buffer from 0 to audio_length - 1 and compare if any value
+      is greater than or equal to cutoff value. if yes, that is the beginning index
+    - For length, traverse over MMS buffer from audio_length - 1 to beginning index and compare if any value
+      is greater than or equal to cutoff value. if yes, that is the ending index of Non Silent Region. From this
+      data compute length with the formulae, length = ending index - beginning index + 1
+*/
+
+#include "rppdefs.h"
+#include <omp.h>
+#include <algorithm>
+
+Rpp32f getSquare(Rpp32f &value)
+{
+    return (value * value);
+}
+
+RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr,
+                                                  RpptDescPtr srcDescPtr,
+                                                  Rpp32s *srcLengthTensor,
+                                                  Rpp32f *detectedIndexTensor,
+                                                  Rpp32f *detectionLengthTensor,
+                                                  Rpp32f cutOffDB,
+                                                  Rpp32s windowLength,
+                                                  Rpp32f referencePower,
+                                                  Rpp32s resetInterval,
+                                                  rpp::Handle& handle)
+{
+    Rpp32u numThreads = handle.GetNumThreads();
+    const Rpp32f cutOff = std::pow(10.0f, cutOffDB * 0.1f);
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for(int batchCount = 0; batchCount < srcDescPtr->n; batchCount++)
+    {
+        Rpp32f *srcPtrTemp = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        Rpp32s srcLength = srcLengthTensor[batchCount];
+
+        // mmsBuffer length is equal to input audio length and can vary dynamically for each input in a batch
+        // preallocating a static buffer for entire batchsize will be too big, so allocate mmsBuffer for each sample dynamically
+        Rpp32f *mmsBuffer = new Rpp32f[srcLength]{};
+        bool referenceMax = (referencePower == 0.0f);
+
+        // set reset interval based on the user input
+        Rpp32s resetLength = (resetInterval == -1) ? srcLength : resetInterval;
+
+        // calculate moving mean square of input
+        Rpp32f meanFactor = 1.0f / windowLength;
+        Rpp32s windowBegin = -windowLength + 1;
+        for (Rpp32s outPos = 0; outPos < srcLength;)
+        {
+            // reset the sumOfSquares values to 0 and recompute the starting value required for next block
+            Rpp32f sumOfSquares = 0.0f;
+            for (Rpp32s i = std::max<Rpp32s>(windowBegin, 0); i < outPos; i++)
+                sumOfSquares += getSquare(srcPtrTemp[i]);
+
+            Rpp32s intervalEndIdx = std::min<Rpp32s>(srcLength, outPos + resetLength);
+            for (; outPos < intervalEndIdx; outPos++, windowBegin++)
+            {
+                sumOfSquares += getSquare(srcPtrTemp[outPos]);
+                mmsBuffer[outPos] = sumOfSquares * meanFactor;
+                if (windowBegin >= 0)
+                    sumOfSquares -= getSquare(srcPtrTemp[windowBegin]);
+            }
+        }
+
+        // convert cutoff from DB to magnitude
+        Rpp32f base = (referenceMax) ? *std::max_element(mmsBuffer, mmsBuffer + srcLength) : referencePower;
+        Rpp32f cutOffMag = base * cutOff;
+
+        // calculate begining index, length of non silent region from the mms buffer
+        Rpp32s endIdx = srcLength;
+        Rpp32s beginIdx = endIdx;
+        Rpp32s detectBegin, detectEnd;
+        for(int i = 0; i < endIdx; i++)
+        {
+            if(mmsBuffer[i] >= cutOffMag)
+            {
+                beginIdx = i;
+                break;
+            }
+        }
+        if(beginIdx == endIdx)
+        {
+            detectBegin = 0;
+            detectEnd = 0;
+        }
+        else
+        {
+            for(int i = endIdx - 1; i >= beginIdx; i--)
+            {
+                if(mmsBuffer[i] >= cutOffMag)
+                {
+                    endIdx = i;
+                    break;
+                }
+            }
+            detectBegin = beginIdx;
+            detectEnd = endIdx - beginIdx + 1;
+        }
+
+        // if both starting index and length of nonsilent region is not 0
+        // adjust the values as per the windowLength
+        if(detectBegin != 0 && detectEnd != 0)
+        {
+            Rpp32s newBegin = std::max<Rpp32s>(detectBegin - (windowLength - 1), 0);
+            detectEnd += detectBegin - newBegin;
+            detectBegin = newBegin;
+        }
+
+        detectedIndexTensor[batchCount] = detectBegin;
+        detectionLengthTensor[batchCount] = detectEnd;
+        delete[] mmsBuffer;
+    }
+    return RPP_SUCCESS;
+}