diff --git a/src/modules/cpu/kernel/non_silent_region_detection.hpp b/src/modules/cpu/kernel/non_silent_region_detection.hpp index 158ccff84..734d267b8 100644 --- a/src/modules/cpu/kernel/non_silent_region_detection.hpp +++ b/src/modules/cpu/kernel/non_silent_region_detection.hpp @@ -28,14 +28,6 @@ Rpp32f getSquare(Rpp32f &value) return (value * value); } -Rpp32f getMax(Rpp32f *values, Rpp32s srcLength) -{ - Rpp32f max = values[0]; - for(int i = 1; i < srcLength; i++) - max = std::max(max, values[i]); - return max; -} - RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr, RpptDescPtr srcDescPtr, Rpp32s *srcLengthTensor, @@ -56,16 +48,16 @@ RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr, { Rpp32f *srcPtrTemp = srcPtr + batchCount * srcDescPtr->strides.nStride; Rpp32s srcLength = srcLengthTensor[batchCount]; + + // mmsBuffer length is equal to input audio length and can vary dynamically for each input in a batch + // preallocating a static buffer for entire batchsize will be too big, so allocate mmsBuffer for each sample dynamically Rpp32f *mmsBuffer = static_cast(calloc(srcLength, sizeof(Rpp32f))); bool referenceMax = (referencePower == 0.0f); // set reset interval based on the user input Rpp32s resetLength = (resetInterval == -1) ? srcLength : resetInterval; - // Calculate buffer size for mms array and allocate mms buffer - Rpp32s mmsBufferSize = srcLength; - - // Calculate moving mean square of input array and store srcPtrTemp mms buffer + // calculate moving mean square of input Rpp32f meanFactor = 1.0f / windowLength; Rpp32s windowBegin = -windowLength + 1; for (Rpp32s outPos = 0; outPos < srcLength;) @@ -84,12 +76,12 @@ RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr, } } - // Convert cutOff from DB to magnitude - Rpp32f base = (referenceMax) ? getMax(mmsBuffer, mmsBufferSize) : referencePower; + // convert cutoff from DB to magnitude + Rpp32f base = (referenceMax) ? *std::max_element(mmsBuffer, mmsBuffer + srcLength) : referencePower; Rpp32f cutOffMag = base * cutOff; - // Calculate begining index, length of non silent region from the mms buffer - Rpp32s endIdx = mmsBufferSize; + // calculate begining index, length of non silent region from the mms buffer + Rpp32s endIdx = srcLength; Rpp32s beginIdx = endIdx; Rpp32s detectBegin, detectEnd; for(int i = 0; i < endIdx; i++) @@ -119,7 +111,7 @@ RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr, detectEnd = endIdx - beginIdx + 1; } - // Extend non silent region + // extend non silent region if(detectBegin != 0 && detectEnd != 0) { Rpp32s newBegin = std::max(detectBegin - (windowLength - 1), 0); diff --git a/utilities/test_suite/HOST/Tensor_host_audio.cpp b/utilities/test_suite/HOST/Tensor_host_audio.cpp index d554425e3..06378c870 100644 --- a/utilities/test_suite/HOST/Tensor_host_audio.cpp +++ b/utilities/test_suite/HOST/Tensor_host_audio.cpp @@ -66,12 +66,12 @@ int main(int argc, char **argv) // Other initializations int missingFuncFlag = 0; - int i = 0, j = 0, fileCnt = 0; - int maxChannels = 0; + int i = 0, j = 0; + int maxSrcChannels = 0; int maxSrcWidth = 0, maxSrcHeight = 0; int maxDstWidth = 0, maxDstHeight = 0; - unsigned long long iBufferSize = 0; - unsigned long long oBufferSize = 0; + Rpp64u iBufferSize = 0; + Rpp64u oBufferSize = 0; static int noOfAudioFiles = 0; // String ops on function name @@ -81,12 +81,12 @@ int main(int argc, char **argv) string func = funcName; // Get number of audio files - vector audioNames, audioFilePath; - search_files_recursive(src, audioNames, audioFilePath, ".wav"); + vector audioNames, audioFilesPath; + search_files_recursive(src, audioNames, audioFilesPath, ".wav"); noOfAudioFiles = audioNames.size(); if (noOfAudioFiles < batchSize || ((noOfAudioFiles % batchSize) != 0)) { - replicate_last_file_to_fill_batch(audioFilePath[noOfAudioFiles - 1], audioFilePath, audioNames, audioNames[noOfAudioFiles - 1], noOfAudioFiles, batchSize); + replicate_last_file_to_fill_batch(audioFilesPath[noOfAudioFiles - 1], audioFilesPath, audioNames, audioNames[noOfAudioFiles - 1], noOfAudioFiles, batchSize); noOfAudioFiles = audioNames.size(); } @@ -99,64 +99,20 @@ int main(int argc, char **argv) // Find max audio dimensions in the input dataset maxSrcHeight = 1; maxDstHeight = 1; - for (int cnt = 0; cnt < noOfAudioFiles ; cnt++) - { - SNDFILE *infile; - SF_INFO sfinfo; - int readcount; - - // The SF_INFO struct must be initialized before using it - memset (&sfinfo, 0, sizeof (sfinfo)); - if (!(infile = sf_open (audioFilePath[cnt].c_str(), SFM_READ, &sfinfo))) - { - sf_close (infile); - continue; - } - - maxSrcWidth = std::max(maxSrcWidth, static_cast(sfinfo.frames)); - maxChannels = std::max(maxChannels, static_cast(sfinfo.channels)); - - // Close input - sf_close (infile); - } + set_audio_max_dimensions(audioFilesPath, maxSrcWidth, maxSrcChannels); maxDstWidth = maxSrcWidth; // Set numDims, offset, n/c/h/w values for src/dst - srcDescPtr->numDims = 4; - srcDescPtr->offsetInBytes = 0; - srcDescPtr->n = batchSize; - srcDescPtr->h = maxSrcHeight; - srcDescPtr->w = maxSrcWidth; - srcDescPtr->c = maxChannels; - - dstDescPtr->numDims = 4; - dstDescPtr->offsetInBytes = 0; - dstDescPtr->n = batchSize; - dstDescPtr->h = maxDstHeight; - dstDescPtr->w = maxDstWidth; - if (testCase == 3) - dstDescPtr->c = 1; - else - dstDescPtr->c = maxChannels; - - // Optionally set w stride as a multiple of 8 for src/dst - srcDescPtr->w = ((srcDescPtr->w / 8) * 8) + 8; - dstDescPtr->w = ((dstDescPtr->w / 8) * 8) + 8; - - // Set n/c/h/w strides for src/dst - srcDescPtr->strides.nStride = srcDescPtr->c * srcDescPtr->w * srcDescPtr->h; - srcDescPtr->strides.hStride = srcDescPtr->c * srcDescPtr->w; - srcDescPtr->strides.wStride = srcDescPtr->c; - srcDescPtr->strides.cStride = 1; - - dstDescPtr->strides.nStride = dstDescPtr->c * dstDescPtr->w * dstDescPtr->h; - dstDescPtr->strides.hStride = dstDescPtr->c * dstDescPtr->w; - dstDescPtr->strides.wStride = dstDescPtr->c; - dstDescPtr->strides.cStride = 1; + Rpp32u offsetInBytes = 0; + set_audio_descriptor_dims_and_strides(srcDescPtr, batchSize, maxSrcHeight, maxSrcWidth, maxSrcChannels, offsetInBytes); + int maxDstChannels = maxSrcChannels; + if(testCase == 3) + maxDstChannels = 1; + set_audio_descriptor_dims_and_strides(dstDescPtr, batchSize, maxDstHeight, maxDstWidth, maxDstChannels, offsetInBytes); // Set buffer sizes for src/dst - iBufferSize = (unsigned long long)srcDescPtr->h * (unsigned long long)srcDescPtr->w * (unsigned long long)srcDescPtr->c * (unsigned long long)srcDescPtr->n; - oBufferSize = (unsigned long long)dstDescPtr->h * (unsigned long long)dstDescPtr->w * (unsigned long long)dstDescPtr->c * (unsigned long long)dstDescPtr->n; + iBufferSize = (Rpp64u)srcDescPtr->h * (Rpp64u)srcDescPtr->w * (Rpp64u)srcDescPtr->c * (Rpp64u)srcDescPtr->n; + oBufferSize = (Rpp64u)dstDescPtr->h * (Rpp64u)dstDescPtr->w * (Rpp64u)dstDescPtr->c * (Rpp64u)dstDescPtr->n; // Initialize host buffers for input & output Rpp32f *inputf32 = (Rpp32f *)calloc(iBufferSize, sizeof(Rpp32f)); @@ -174,42 +130,10 @@ int main(int argc, char **argv) { for (int iterCount = 0; iterCount < noOfIterations; iterCount++) { - for (int cnt = 0; cnt < batchSize; cnt++) - { - Rpp32f *inputTempF32; - inputTempF32 = inputf32 + (cnt * srcDescPtr->strides.nStride); - - SNDFILE *infile; - SF_INFO sfinfo; - int readcount; - - // The SF_INFO struct must be initialized before using it - memset (&sfinfo, 0, sizeof (sfinfo)); - if (!(infile = sf_open (audioFilePath[fileCnt].c_str(), SFM_READ, &sfinfo))) - { - sf_close (infile); - continue; - } + // Read and decode audio and fill the audio dim values + if (inputBitDepth == 2) + read_audio_batch_and_fill_dims(srcDescPtr, inputf32, audioFilesPath, iterCount, srcLengthTensor, channelsTensor); - srcLengthTensor[cnt] = sfinfo.frames; - channelsTensor[cnt] = sfinfo.channels; - srcDims[cnt].width = sfinfo.frames; - dstDims[cnt].width = sfinfo.frames; - srcDims[cnt].height = 1; - dstDims[cnt].height = 1; - - int bufferLength = sfinfo.frames * sfinfo.channels; - if (inputBitDepth == 2) - { - readcount = (int) sf_read_float (infile, inputTempF32, bufferLength); - if (readcount != bufferLength) - cout << "F32 Unable to read audio file completely " << std::endl; - } - fileCnt++; - - // Close input - sf_close (infile); - } clock_t startCpuTime, endCpuTime; double startWallTime, endWallTime; switch (testCase) @@ -231,6 +155,7 @@ int main(int argc, char **argv) else missingFuncFlag = 1; + // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests if (testType == 0) verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, audioNames, dst); @@ -243,10 +168,12 @@ int main(int argc, char **argv) Rpp32f multiplier = std::log(10); Rpp32f referenceMagnitude = 1.0f; - for (i = 0; i < noOfAudioFiles; i++) + for (int i = 0; i < batchSize; i++) { srcDims[i].height = srcLengthTensor[i]; srcDims[i].width = 1; + dstDims[i].height = srcDims[i].height; + dstDims[i].width = 1; } startWallTime = omp_get_wtime(); @@ -267,29 +194,31 @@ int main(int argc, char **argv) endCpuTime = clock(); endWallTime = omp_get_wtime(); - cpuTime = ((double)(endCpuTime - startCpuTime)) / CLOCKS_PER_SEC; - wallTime = endWallTime - startWallTime; if (missingFuncFlag == 1) { printf("\nThe functionality %s doesn't yet exist in RPP\n", func.c_str()); return -1; } + + cpuTime = ((double)(endCpuTime - startCpuTime)) / CLOCKS_PER_SEC; + wallTime = endWallTime - startWallTime; maxWallTime = std::max(maxWallTime, wallTime); minWallTime = std::min(minWallTime, wallTime); avgWallTime += wallTime; - cpuTime *= 1000; - wallTime *= 1000; + // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests if (testType == 0) { - if (batchSize == 8 && testCase != 0) + /* Run only if testCase is not 0 + For testCase 0 verify_non_silent_region_detection function is used for QA testing */ + if (testCase != 0) verify_output(outputf32, dstDescPtr, dstDims, testCaseName, audioNames, dst); - cout <<"\n\n"; - cout <<"CPU Backend Clock Time: "<< cpuTime <<" ms/batch"<< endl; - cout <<"CPU Backend Wall Time: "<< wallTime <<" ms/batch"<< endl; - - // If DEBUG_MODE is set to 1 dump the outputs to csv files for debugging + /* Dump the outputs to csv files for debugging + Runs only if + 1. DEBUG_MODE is enabled + 2. Current iteration is 1st iteration + 3. Test case is not 0 */ if (DEBUG_MODE && iterCount == 0 && testCase != 0) { std::ofstream refFile; @@ -300,12 +229,10 @@ int main(int argc, char **argv) } } } - - // Reset fileIndex to 0 for next run - fileCnt = 0; } rppDestroyHost(handle); + // performance test mode if (testType == 1) { // Display measured times diff --git a/utilities/test_suite/rpp_test_suite_audio.h b/utilities/test_suite/rpp_test_suite_audio.h index 401f14a80..aa03eb46b 100644 --- a/utilities/test_suite/rpp_test_suite_audio.h +++ b/utilities/test_suite/rpp_test_suite_audio.h @@ -40,6 +40,86 @@ std::map audioAugmentationMap = {1, "to_decibels"}, }; +// sets descriptor dimensions and strides of src/dst +inline void set_audio_descriptor_dims_and_strides(RpptDescPtr descPtr, int batchSize, int maxHeight, int maxWidth, int maxChannels, int offsetInBytes) +{ + descPtr->numDims = 4; + descPtr->offsetInBytes = offsetInBytes; + descPtr->n = batchSize; + descPtr->h = maxHeight; + descPtr->w = maxWidth; + descPtr->c = maxChannels; + + // Optionally set w stride as a multiple of 8 for src/dst + descPtr->w = ((descPtr->w / 8) * 8) + 8; + descPtr->strides.nStride = descPtr->c * descPtr->w * descPtr->h; + descPtr->strides.hStride = descPtr->c * descPtr->w; + descPtr->strides.wStride = descPtr->c; + descPtr->strides.cStride = 1; +} + +// sets values of maxHeight and maxWidth +inline void set_audio_max_dimensions(vector audioFilesPath, int& maxWidth, int& maxChannels) +{ + for (const std::string& audioPath : audioFilesPath) + { + SNDFILE *infile; + SF_INFO sfinfo; + int readcount; + + // The SF_INFO struct must be initialized before using it + memset (&sfinfo, 0, sizeof (sfinfo)); + if (!(infile = sf_open (audioPath.c_str(), SFM_READ, &sfinfo))) + { + sf_close (infile); + continue; + } + + maxWidth = std::max(maxWidth, static_cast(sfinfo.frames)); + maxChannels = std::max(maxChannels, static_cast(sfinfo.channels)); + + // Close input + sf_close (infile); + } +} + +void read_audio_batch_and_fill_dims(RpptDescPtr descPtr, Rpp32f *inputf32, vector audioFilesPath, int iterCount, Rpp32s *srcLengthTensor, Rpp32s *channelsTensor) +{ + auto fileIndex = iterCount * descPtr->n; + for (int i = 0, j = fileIndex; i < descPtr->n, j < fileIndex + descPtr->n; i++, j++) + { + Rpp32f *inputTempF32; + inputTempF32 = inputf32 + (i * descPtr->strides.nStride); + + // Read and decode data + SNDFILE *infile; + SF_INFO sfinfo; + int readcount; + + // The SF_INFO struct must be initialized before using it + memset (&sfinfo, 0, sizeof (sfinfo)); + if (!(infile = sf_open (audioFilesPath[j].c_str(), SFM_READ, &sfinfo))) + { + sf_close (infile); + continue; + } + + srcLengthTensor[i] = sfinfo.frames; + channelsTensor[i] = sfinfo.channels; + + int bufferLength = sfinfo.frames * sfinfo.channels; + readcount = (int) sf_read_float (infile, inputTempF32, bufferLength); + if (readcount != bufferLength) + { + std::cout << "Unable to read audio file: "< audioNames, string dst) { fstream refFile; @@ -64,6 +144,10 @@ void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dst Rpp32f refVal, outVal; Rpp32f *dstPtrCurrent = dstPtr + batchCount * dstDescPtr->strides.nStride; Rpp32f *dstPtrRow = dstPtrCurrent; + Rpp32u hStride = dstDescPtr->strides.hStride; + if (dstDims[batchCount].width == 1) + hStride = 1; + for (int i = 0; i < dstDims[batchCount].height; i++) { Rpp32f *dstPtrTemp = dstPtrRow; @@ -75,7 +159,7 @@ void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dst if (!invalidComparision && abs(outVal - refVal) < 1e-20) matchedIndices += 1; } - dstPtrRow += dstDescPtr->strides.hStride; + dstPtrRow += hStride; } refFile.close(); if (matchedIndices == (dstDims[batchCount].width * dstDims[batchCount].height) && matchedIndices !=0)