Skip to content

Commit

Permalink
RPP Tensor Audio Support - Non Silent Region (r-abishek#224)
Browse files Browse the repository at this point in the history
* Initial commit - Non slient region detection

Includes unittest setup

* Replace vectors with arrays

* Replace Rpp64s with Rpp32s

* Optimize and precompute cutOff

* Fix buffer used

* Fix buffer used

* Update testsuite for Audio

* code cleanup

* Add Readme file for Audio test suite

* changes based on review comments

* minor change

* Remove unittest folders and updated README.md

* Remove unit tests

* minor change

* code cleanup

* added common header file for audio helper functions

* removed unncessary audio wav files

fixed bug in ROI updation for audio test suite

resolved issue in summary generation for performance tests in python

* removed log file

* added doxygen support for audio

* minor change

* removed the usage of getMax function and used std::max_element

* modularized code in test suite

* minor change

* resolved codacy warnings

* Codacy fix - Remove unused cpuTime

* CMakeLists - Version Update

1.5.0 - TOT Version

* CHANGELOG Updates

Version 1.5.0 placeholder

* resolved issue with file_system dependency in test suite

* Doxygen changes

changed malloc to new in NSR kernel

* added ctests for audio test suite for CI

made changes to add more clarity on the QA Tests results

* Cmake mods for ctest

* HOST-only build error bugfix

* added qa mode paramter to python audio script

added golden output map for QA testing of Non silent region detection

* minor change

* added example for MMS calculation in comments for better understanding

* updated info used to for running audio test suite

* removed bitdepth variable from audio test suite

* added more information on computing NSR outputs in the example added

* added variables for min, max case in python test suite

* CTest - Audio Test Updates

Run audio tests only if audio package found

* CI - Update common.groovy

Add audio test support

* Codacy fix

* Update common.groovy - Install Test Deps

---------

Co-authored-by: Snehaa Giridharan <snehaa@multicorewareinc.com>
Co-authored-by: HazarathKumarM <hazarathkumar@multicorewareinc.com>
Co-authored-by: sampath1117 <sampath.rachumallu@multicorewareinc.com>
Co-authored-by: Kiriti Gowda <kiritigowda@gmail.com>
Co-authored-by: Kiriti Gowda <kiriti.nageshgowda@amd.com>
  • Loading branch information
6 people committed Nov 21, 2023
1 parent 16d4892 commit 1f5f3f5
Show file tree
Hide file tree
Showing 22 changed files with 1,260 additions and 45 deletions.
3 changes: 2 additions & 1 deletion .Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,8 @@ INPUT = README.md \
include/rppt_tensor_filter_augmentations.h \
include/rppt_tensor_geometric_augmentations.h \
include/rppt_tensor_morphological_operations.h \
include/rppt_tensor_statistical_operations.h
include/rppt_tensor_statistical_operations.h \
include/rppt_tensor_audio_augmentations.h


# This tag can be used to specify the character encoding of the source files
Expand Down
15 changes: 11 additions & 4 deletions .jenkins/common.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,22 @@ def runCompileCommand(platform, project, jobName, boolean debug=false, boolean s
String buildTypeDir = debug ? 'debug' : 'release'
String backend = 'HIP'
String enableSCL = 'echo build-rpp'
String enableAudioTesting = 'echo audio-tests-not-supported'

if (platform.jenkinsLabel.contains('centos')) {
backend = 'CPU'
if (platform.jenkinsLabel.contains('centos7')) {
enableSCL = 'source scl_source enable llvm-toolset-7'
enableSCL = 'source scl_source enable llvm-toolset-7'
}
else if (platform.jenkinsLabel.contains('ubuntu')) {
enableAudioTesting = 'sudo apt-get install -y libsndfile1-dev'
if (platform.jenkinsLabel.contains('ubuntu20')) {
backend = 'OCL'
}
}
else if (platform.jenkinsLabel.contains('ubuntu20')) {
backend = 'OCL'
else if (platform.jenkinsLabel.contains('rhel')) {
enableAudioTesting = 'sudo yum install -y libsndfile-devel'
}


def command = """#!/usr/bin/env bash
set -x
Expand All @@ -29,6 +35,7 @@ def runCompileCommand(platform, project, jobName, boolean debug=false, boolean s
cd ${project.paths.project_build_prefix}
mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir}
${enableSCL}
${enableAudioTesting}
cmake -DBACKEND=${backend} ${buildTypeArg} ../..
make -j\$(nproc)
sudo make install
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ To use RPP, you must have installed the following:
make -j$nproc
sudo make install
```
* Libsndfile installation
```
sudo apt-get update
sudo apt-get install libsndfile1-dev
```

## Build and install RPP

Expand Down
1 change: 1 addition & 0 deletions include/rppt.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ extern "C" {
#include "rppt_tensor_geometric_augmentations.h"
#include "rppt_tensor_morphological_operations.h"
#include "rppt_tensor_statistical_operations.h"
#include "rppt_tensor_audio_augmentations.h"

#ifdef __cplusplus
}
Expand Down
66 changes: 66 additions & 0 deletions include/rppt_tensor_audio_augmentations.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#ifndef RPPT_TENSOR_AUDIO_AUGMENTATIONS_H
#define RPPT_TENSOR_AUDIO_AUGMENTATIONS_H

/*!
* \file
* \brief RPPT Tensor Operations - Audio Augmentations.
* \defgroup group_rppt_tensor_audio_augmentations RPPT Tensor Operations - Audio Augmentations.
* \brief RPPT Tensor Operations - Audio Augmentations.
*/

/*! \addtogroup group_rppt_tensor_audio_augmentations
* @{
*/

#include "rpp.h"
#include "rppdefs.h"
#ifdef __cplusplus
extern "C" {
#endif

/*! \brief Non Silent Region Detection augmentation on HOST backend
* \details Non Silent Region Detection augmentation for 1D audio buffer
\n Finds the starting index and length of non silent region in the audio buffer by comparing the
calculated short-term power with cutoff value passed
* \param[in] srcPtr source tensor in HOST memory
* \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
* \param[in] srcLengthTensor source audio buffer length (1D tensor in HOST memory, of size batchSize)
* \param[out] detectedIndexTensor beginning index of non silent region (1D tensor in HOST memory, of size batchSize)
* \param[out] detectionLengthTensor length of non silent region (1D tensor in HOST memory, of size batchSize)
* \param[in] cutOffDB cutOff in dB below which the signal is considered silent
* \param[in] windowLength window length used for computing short-term power of the signal
* \param[in] referencePower reference power that is used to convert the signal to dB
* \param[in] resetInterval number of samples after which the moving mean average is recalculated to avoid precision loss
* \param[in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
* \return A <tt> \ref RppStatus</tt> enumeration.
* \retval RPP_SUCCESS Successful completion.
* \retval RPP_ERROR* Unsuccessful completion.
*/
RppStatus rppt_non_silent_region_detection_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, Rpp32s *srcLengthTensor, Rpp32f *detectedIndexTensor, Rpp32f *detectionLengthTensor, Rpp32f cutOffDB, Rpp32s windowLength, Rpp32f referencePower, Rpp32s resetInterval, rppHandle_t rppHandle);

#ifdef __cplusplus
}
#endif
#endif // RPPT_TENSOR_AUDIO_AUGMENTATIONS_H
3 changes: 3 additions & 0 deletions src/modules/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ if( "${BACKEND}" STREQUAL "HIP")
# Set HIP compiler and flags
set(CMAKE_CXX_COMPILER ${COMPILER_FOR_HIP})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_HIPCC_FLAGS}")
set_source_files_properties(rppt_tensor_audio_augmentations.cpp PROPERTIES COMPILE_FLAGS -mno-fma)

# Add HIP specific preprocessor flags
add_definitions(-DHIP_COMPILE)
Expand All @@ -105,6 +106,7 @@ elseif( "${BACKEND}" STREQUAL "OCL")
file(GLOB MOD_CL_CPP "cl/*.cpp" )
list(APPEND Rpp_Source ${CPPFILES} ${MOD_CL_CPP})
message("-- ${Green}OpenCL kernels added!${ColourReset}")
set_source_files_properties(rppt_tensor_audio_augmentations.cpp PROPERTIES COMPILE_FLAGS -mno-fma)

# Add OpenCL specific preprocessor flags
add_definitions(-DOCL_COMPILE)
Expand All @@ -118,6 +120,7 @@ elseif( "${BACKEND}" STREQUAL "OCL")
elseif( "${BACKEND}" STREQUAL "CPU")
# Add CPU specific includes
set(INCLUDE_LIST ${CMAKE_SOURCE_DIR}/src/include/common/)
set_source_files_properties(rppt_tensor_audio_augmentations.cpp PROPERTIES COMPILE_FLAGS -mno-fma)
endif()
message("-- ${White}AMD RPP ${PROJECT_NAME} -- Include Directories:${INCLUDE_LIST}${ColourReset}")
add_compile_options("-Wno-unused-result")
Expand Down
25 changes: 25 additions & 0 deletions src/modules/cpu/host_tensor_audio_augmentations.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#ifndef HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP
#define HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP

#include "kernel/non_silent_region_detection.hpp"

#endif // HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP
192 changes: 192 additions & 0 deletions src/modules/cpu/kernel/non_silent_region_detection.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/*
Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/


/* Non Silent Region Detection requires Moving Mean Square (MMS) computation on input audio data
MMS buffer is a 1D buffer having same length as input audio. The algorithm used for MMS computation is explained with a sample use case
Example:
Input: [1, 2, 3, 4, 5, 6, 7, 8]
audio_length = 8
window_length = 3
reset_interval_length = 4
window_begin = -window_length + 1 = -2
window_factor = 1 / window_length = 1/3
MMS computation is divided into blocks of reset interval length
num_blocks = audio_length / reset_interval_length
For the above example we will have
- 2 blocks (8 / 4)
- each block runs for 4 iterations
- in each iteration window begin value is increment by 1
Block1
window begin = -2
Iteration 0: sum_of_squares = 1*1 // window begin = -2
store sum_of_squares * window_factor in MMS[0]
Iteration 1: sum_of_squares = 1*1 + 2*2 // window begin = -1
store sum_of_squares * window_factor in MMS[1]
Iteration 2: sum_of_squares = 1*1 + 2*2 + 3*3 // window begin = 0
store sum_of_squares * window_factor in MMS[2]
sum_of_squares -= 1*1
Iteration 3: sum_of_squares = 2*2 + 3*3 + 4*4 // window begin = 1
store sum_of_squares * window_factor in MMS[3]
sum_of_squares -= 2*2
Block2
Iteration 0: sum_of_squares = 3*3 + 4*4 + 5*5 // window begin = 2
store sum_of_squares * window_factor in MMS[4]
sum_of_squares -= 3*3
Iteration 1: sum_of_squares = 4*4 + 5*5 + 6*6 // window begin = 3
store sum_of_squares * window_factor in MMS[5]
sum_of_squares -= 4*4
Iteration 2: sum_of_squares = 5*5 + 6*6 + 7*7 // window begin = 4
store sum_of_squares * window_factor in MMS[6]
sum_of_squares -= 5*5
Iteration 3: sum_of_squares = 6*6 + 7*7 + 8*8 // window begin = 5
store sum_of_squares * window_factor in MMS[7]
sum_of_squares -= 6*6
For computing beginning index and length of Non Silent Region in audio data we traverse over
the entire MMS buffer and compare these values with the calculated cutoff value
- For beginning index, traverse over MMS buffer from 0 to audio_length - 1 and compare if any value
is greater than or equal to cutoff value. if yes, that is the beginning index
- For length, traverse over MMS buffer from audio_length - 1 to beginning index and compare if any value
is greater than or equal to cutoff value. if yes, that is the ending index of Non Silent Region. From this
data compute length with the formulae, length = ending index - beginning index + 1
*/

#include "rppdefs.h"
#include <omp.h>
#include <algorithm>

Rpp32f getSquare(Rpp32f &value)
{
return (value * value);
}

RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr,
RpptDescPtr srcDescPtr,
Rpp32s *srcLengthTensor,
Rpp32f *detectedIndexTensor,
Rpp32f *detectionLengthTensor,
Rpp32f cutOffDB,
Rpp32s windowLength,
Rpp32f referencePower,
Rpp32s resetInterval,
rpp::Handle& handle)
{
Rpp32u numThreads = handle.GetNumThreads();
const Rpp32f cutOff = std::pow(10.0f, cutOffDB * 0.1f);

omp_set_dynamic(0);
#pragma omp parallel for num_threads(numThreads)
for(int batchCount = 0; batchCount < srcDescPtr->n; batchCount++)
{
Rpp32f *srcPtrTemp = srcPtr + batchCount * srcDescPtr->strides.nStride;
Rpp32s srcLength = srcLengthTensor[batchCount];

// mmsBuffer length is equal to input audio length and can vary dynamically for each input in a batch
// preallocating a static buffer for entire batchsize will be too big, so allocate mmsBuffer for each sample dynamically
Rpp32f *mmsBuffer = new Rpp32f[srcLength]{};
bool referenceMax = (referencePower == 0.0f);

// set reset interval based on the user input
Rpp32s resetLength = (resetInterval == -1) ? srcLength : resetInterval;

// calculate moving mean square of input
Rpp32f meanFactor = 1.0f / windowLength;
Rpp32s windowBegin = -windowLength + 1;
for (Rpp32s outPos = 0; outPos < srcLength;)
{
// reset the sumOfSquares values to 0 and recompute the starting value required for next block
Rpp32f sumOfSquares = 0.0f;
for (Rpp32s i = std::max<Rpp32s>(windowBegin, 0); i < outPos; i++)
sumOfSquares += getSquare(srcPtrTemp[i]);

Rpp32s intervalEndIdx = std::min<Rpp32s>(srcLength, outPos + resetLength);
for (; outPos < intervalEndIdx; outPos++, windowBegin++)
{
sumOfSquares += getSquare(srcPtrTemp[outPos]);
mmsBuffer[outPos] = sumOfSquares * meanFactor;
if (windowBegin >= 0)
sumOfSquares -= getSquare(srcPtrTemp[windowBegin]);
}
}

// convert cutoff from DB to magnitude
Rpp32f base = (referenceMax) ? *std::max_element(mmsBuffer, mmsBuffer + srcLength) : referencePower;
Rpp32f cutOffMag = base * cutOff;

// calculate begining index, length of non silent region from the mms buffer
Rpp32s endIdx = srcLength;
Rpp32s beginIdx = endIdx;
Rpp32s detectBegin, detectEnd;
for(int i = 0; i < endIdx; i++)
{
if(mmsBuffer[i] >= cutOffMag)
{
beginIdx = i;
break;
}
}
if(beginIdx == endIdx)
{
detectBegin = 0;
detectEnd = 0;
}
else
{
for(int i = endIdx - 1; i >= beginIdx; i--)
{
if(mmsBuffer[i] >= cutOffMag)
{
endIdx = i;
break;
}
}
detectBegin = beginIdx;
detectEnd = endIdx - beginIdx + 1;
}

// if both starting index and length of nonsilent region is not 0
// adjust the values as per the windowLength
if(detectBegin != 0 && detectEnd != 0)
{
Rpp32s newBegin = std::max<Rpp32s>(detectBegin - (windowLength - 1), 0);
detectEnd += detectBegin - newBegin;
detectBegin = newBegin;
}

detectedIndexTensor[batchCount] = detectBegin;
detectionLengthTensor[batchCount] = detectEnd;
delete[] mmsBuffer;
}
return RPP_SUCCESS;
}
Loading

0 comments on commit 1f5f3f5

Please sign in to comment.