diff --git a/src/modules/cpu/kernel/non_silent_region_detection.hpp b/src/modules/cpu/kernel/non_silent_region_detection.hpp
index 158ccff84..734d267b8 100644
--- a/src/modules/cpu/kernel/non_silent_region_detection.hpp
+++ b/src/modules/cpu/kernel/non_silent_region_detection.hpp
@@ -28,14 +28,6 @@ Rpp32f getSquare(Rpp32f &value)
     return (value * value);
 }
 
-Rpp32f getMax(Rpp32f *values, Rpp32s srcLength)
-{
-    Rpp32f max = values[0];
-    for(int i = 1; i < srcLength; i++)
-        max = std::max(max, values[i]);
-    return max;
-}
-
 RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr,
                                                   RpptDescPtr srcDescPtr,
                                                   Rpp32s *srcLengthTensor,
@@ -56,16 +48,16 @@ RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr,
     {
         Rpp32f *srcPtrTemp = srcPtr + batchCount * srcDescPtr->strides.nStride;
         Rpp32s srcLength = srcLengthTensor[batchCount];
+
+        // mmsBuffer length is equal to input audio length and can vary dynamically for each input in a batch
+        // preallocating a static buffer for entire batchsize will be too big, so allocate mmsBuffer for each sample dynamically
         Rpp32f *mmsBuffer = static_cast<Rpp32f *>(calloc(srcLength, sizeof(Rpp32f)));
         bool referenceMax = (referencePower == 0.0f);
 
         // set reset interval based on the user input
         Rpp32s resetLength = (resetInterval == -1) ? srcLength : resetInterval;
 
-        // Calculate buffer size for mms array and allocate mms buffer
-        Rpp32s mmsBufferSize = srcLength;
-
-        // Calculate moving mean square of input array and store srcPtrTemp mms buffer
+        // calculate moving mean square of input
         Rpp32f meanFactor = 1.0f / windowLength;
         Rpp32s windowBegin = -windowLength + 1;
         for (Rpp32s outPos = 0; outPos < srcLength;)
@@ -84,12 +76,12 @@ RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr,
             }
         }
 
-        // Convert cutOff from DB to magnitude
-        Rpp32f base = (referenceMax) ? getMax(mmsBuffer, mmsBufferSize) : referencePower;
+        // convert cutoff from DB to magnitude
+        Rpp32f base = (referenceMax) ? *std::max_element(mmsBuffer, mmsBuffer + srcLength) : referencePower;
         Rpp32f cutOffMag = base * cutOff;
 
-        // Calculate begining index, length of non silent region from the mms buffer
-        Rpp32s endIdx = mmsBufferSize;
+        // calculate begining index, length of non silent region from the mms buffer
+        Rpp32s endIdx = srcLength;
         Rpp32s beginIdx = endIdx;
         Rpp32s detectBegin, detectEnd;
         for(int i = 0; i < endIdx; i++)
@@ -119,7 +111,7 @@ RppStatus non_silent_region_detection_host_tensor(Rpp32f *srcPtr,
             detectEnd = endIdx - beginIdx + 1;
         }
 
-        // Extend non silent region
+        // extend non silent region
         if(detectBegin != 0 && detectEnd != 0)
         {
             Rpp32s newBegin = std::max<Rpp32s>(detectBegin - (windowLength - 1), 0);
diff --git a/utilities/test_suite/HOST/Tensor_host_audio.cpp b/utilities/test_suite/HOST/Tensor_host_audio.cpp
index d554425e3..06378c870 100644
--- a/utilities/test_suite/HOST/Tensor_host_audio.cpp
+++ b/utilities/test_suite/HOST/Tensor_host_audio.cpp
@@ -66,12 +66,12 @@ int main(int argc, char **argv)
 
     // Other initializations
     int missingFuncFlag = 0;
-    int i = 0, j = 0, fileCnt = 0;
-    int maxChannels = 0;
+    int i = 0, j = 0;
+    int maxSrcChannels = 0;
     int maxSrcWidth = 0, maxSrcHeight = 0;
     int maxDstWidth = 0, maxDstHeight = 0;
-    unsigned long long iBufferSize = 0;
-    unsigned long long oBufferSize = 0;
+    Rpp64u iBufferSize = 0;
+    Rpp64u oBufferSize = 0;
     static int noOfAudioFiles = 0;
 
     // String ops on function name
@@ -81,12 +81,12 @@ int main(int argc, char **argv)
     string func = funcName;
 
     // Get number of audio files
-    vector<string> audioNames, audioFilePath;
-    search_files_recursive(src, audioNames, audioFilePath, ".wav");
+    vector<string> audioNames, audioFilesPath;
+    search_files_recursive(src, audioNames, audioFilesPath, ".wav");
     noOfAudioFiles = audioNames.size();
     if (noOfAudioFiles < batchSize || ((noOfAudioFiles % batchSize) != 0))
     {
-        replicate_last_file_to_fill_batch(audioFilePath[noOfAudioFiles - 1], audioFilePath, audioNames, audioNames[noOfAudioFiles - 1], noOfAudioFiles, batchSize);
+        replicate_last_file_to_fill_batch(audioFilesPath[noOfAudioFiles - 1], audioFilesPath, audioNames, audioNames[noOfAudioFiles - 1], noOfAudioFiles, batchSize);
         noOfAudioFiles = audioNames.size();
     }
 
@@ -99,64 +99,20 @@ int main(int argc, char **argv)
     // Find max audio dimensions in the input dataset
     maxSrcHeight = 1;
     maxDstHeight = 1;
-    for (int cnt = 0; cnt < noOfAudioFiles ; cnt++)
-    {
-        SNDFILE	*infile;
-        SF_INFO sfinfo;
-        int	readcount;
-
-        // The SF_INFO struct must be initialized before using it
-        memset (&sfinfo, 0, sizeof (sfinfo));
-        if (!(infile = sf_open (audioFilePath[cnt].c_str(), SFM_READ, &sfinfo)))
-        {
-            sf_close (infile);
-            continue;
-        }
-
-        maxSrcWidth = std::max(maxSrcWidth, static_cast<int>(sfinfo.frames));
-        maxChannels = std::max(maxChannels, static_cast<int>(sfinfo.channels));
-
-        // Close input
-        sf_close (infile);
-    }
+    set_audio_max_dimensions(audioFilesPath, maxSrcWidth, maxSrcChannels);
     maxDstWidth = maxSrcWidth;
 
     // Set numDims, offset, n/c/h/w values for src/dst
-    srcDescPtr->numDims = 4;
-    srcDescPtr->offsetInBytes = 0;
-    srcDescPtr->n = batchSize;
-    srcDescPtr->h = maxSrcHeight;
-    srcDescPtr->w = maxSrcWidth;
-    srcDescPtr->c = maxChannels;
-
-    dstDescPtr->numDims = 4;
-    dstDescPtr->offsetInBytes = 0;
-    dstDescPtr->n = batchSize;
-    dstDescPtr->h = maxDstHeight;
-    dstDescPtr->w = maxDstWidth;
-    if (testCase == 3)
-        dstDescPtr->c = 1;
-    else
-        dstDescPtr->c = maxChannels;
-
-    // Optionally set w stride as a multiple of 8 for src/dst
-    srcDescPtr->w = ((srcDescPtr->w / 8) * 8) + 8;
-    dstDescPtr->w = ((dstDescPtr->w / 8) * 8) + 8;
-
-    // Set n/c/h/w strides for src/dst
-    srcDescPtr->strides.nStride = srcDescPtr->c * srcDescPtr->w * srcDescPtr->h;
-    srcDescPtr->strides.hStride = srcDescPtr->c * srcDescPtr->w;
-    srcDescPtr->strides.wStride = srcDescPtr->c;
-    srcDescPtr->strides.cStride = 1;
-
-    dstDescPtr->strides.nStride = dstDescPtr->c * dstDescPtr->w * dstDescPtr->h;
-    dstDescPtr->strides.hStride = dstDescPtr->c * dstDescPtr->w;
-    dstDescPtr->strides.wStride = dstDescPtr->c;
-    dstDescPtr->strides.cStride = 1;
+    Rpp32u offsetInBytes = 0;
+    set_audio_descriptor_dims_and_strides(srcDescPtr, batchSize, maxSrcHeight, maxSrcWidth, maxSrcChannels, offsetInBytes);
+    int maxDstChannels = maxSrcChannels;
+    if(testCase == 3)
+        maxDstChannels = 1;
+    set_audio_descriptor_dims_and_strides(dstDescPtr, batchSize, maxDstHeight, maxDstWidth, maxDstChannels, offsetInBytes);
 
     // Set buffer sizes for src/dst
-    iBufferSize = (unsigned long long)srcDescPtr->h * (unsigned long long)srcDescPtr->w * (unsigned long long)srcDescPtr->c * (unsigned long long)srcDescPtr->n;
-    oBufferSize = (unsigned long long)dstDescPtr->h * (unsigned long long)dstDescPtr->w * (unsigned long long)dstDescPtr->c * (unsigned long long)dstDescPtr->n;
+    iBufferSize = (Rpp64u)srcDescPtr->h * (Rpp64u)srcDescPtr->w * (Rpp64u)srcDescPtr->c * (Rpp64u)srcDescPtr->n;
+    oBufferSize = (Rpp64u)dstDescPtr->h * (Rpp64u)dstDescPtr->w * (Rpp64u)dstDescPtr->c * (Rpp64u)dstDescPtr->n;
 
     // Initialize host buffers for input & output
     Rpp32f *inputf32 = (Rpp32f *)calloc(iBufferSize, sizeof(Rpp32f));
@@ -174,42 +130,10 @@ int main(int argc, char **argv)
     {
         for (int iterCount = 0; iterCount < noOfIterations; iterCount++)
         {
-            for (int cnt = 0; cnt < batchSize; cnt++)
-            {
-                Rpp32f *inputTempF32;
-                inputTempF32 = inputf32 + (cnt * srcDescPtr->strides.nStride);
-
-                SNDFILE	*infile;
-                SF_INFO sfinfo;
-                int	readcount;
-
-                // The SF_INFO struct must be initialized before using it
-                memset (&sfinfo, 0, sizeof (sfinfo));
-                if (!(infile = sf_open (audioFilePath[fileCnt].c_str(), SFM_READ, &sfinfo)))
-                {
-                    sf_close (infile);
-                    continue;
-                }
+            // Read and decode audio and fill the audio dim values
+            if (inputBitDepth == 2)
+                read_audio_batch_and_fill_dims(srcDescPtr, inputf32, audioFilesPath, iterCount, srcLengthTensor, channelsTensor);
 
-                srcLengthTensor[cnt] = sfinfo.frames;
-                channelsTensor[cnt] = sfinfo.channels;
-                srcDims[cnt].width = sfinfo.frames;
-                dstDims[cnt].width = sfinfo.frames;
-                srcDims[cnt].height = 1;
-                dstDims[cnt].height = 1;
-
-                int bufferLength = sfinfo.frames * sfinfo.channels;
-                if (inputBitDepth == 2)
-                {
-                    readcount = (int) sf_read_float (infile, inputTempF32, bufferLength);
-                    if (readcount != bufferLength)
-                        cout << "F32 Unable to read audio file completely " << std::endl;
-                }
-                fileCnt++;
-
-                // Close input
-                sf_close (infile);
-            }
             clock_t startCpuTime, endCpuTime;
             double startWallTime, endWallTime;
             switch (testCase)
@@ -231,6 +155,7 @@ int main(int argc, char **argv)
                     else
                         missingFuncFlag = 1;
 
+                    // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests
                     if (testType == 0)
                         verify_non_silent_region_detection(detectedIndex, detectionLength, testCaseName, batchSize, audioNames, dst);
 
@@ -243,10 +168,12 @@ int main(int argc, char **argv)
                     Rpp32f multiplier = std::log(10);
                     Rpp32f referenceMagnitude = 1.0f;
 
-                    for (i = 0; i < noOfAudioFiles; i++)
+                    for (int i = 0; i < batchSize; i++)
                     {
                         srcDims[i].height = srcLengthTensor[i];
                         srcDims[i].width = 1;
+                        dstDims[i].height = srcDims[i].height;
+                        dstDims[i].width = 1;
                     }
 
                     startWallTime = omp_get_wtime();
@@ -267,29 +194,31 @@ int main(int argc, char **argv)
 
             endCpuTime = clock();
             endWallTime = omp_get_wtime();
-            cpuTime = ((double)(endCpuTime - startCpuTime)) / CLOCKS_PER_SEC;
-            wallTime = endWallTime - startWallTime;
             if (missingFuncFlag == 1)
             {
                 printf("\nThe functionality %s doesn't yet exist in RPP\n", func.c_str());
                 return -1;
             }
+
+            cpuTime = ((double)(endCpuTime - startCpuTime)) / CLOCKS_PER_SEC;
+            wallTime = endWallTime - startWallTime;
             maxWallTime = std::max(maxWallTime, wallTime);
             minWallTime = std::min(minWallTime, wallTime);
             avgWallTime += wallTime;
-            cpuTime *= 1000;
-            wallTime *= 1000;
 
+            // QA mode - verify outputs with golden outputs. Below code doesn’t run for performance tests
             if (testType == 0)
             {
-                if (batchSize == 8 && testCase != 0)
+                /* Run only if testCase is not 0
+                For testCase 0 verify_non_silent_region_detection function is used for QA testing */
+                if (testCase != 0)
                     verify_output(outputf32, dstDescPtr, dstDims, testCaseName, audioNames, dst);
 
-                cout <<"\n\n";
-                cout <<"CPU Backend Clock Time: "<< cpuTime <<" ms/batch"<< endl;
-                cout <<"CPU Backend Wall Time: "<< wallTime <<" ms/batch"<< endl;
-
-                // If DEBUG_MODE is set to 1 dump the outputs to csv files for debugging
+                /* Dump the outputs to csv files for debugging
+                Runs only if
+                1. DEBUG_MODE is enabled
+                2. Current iteration is 1st iteration
+                3. Test case is not 0 */
                 if (DEBUG_MODE && iterCount == 0 && testCase != 0)
                 {
                     std::ofstream refFile;
@@ -300,12 +229,10 @@ int main(int argc, char **argv)
                 }
             }
         }
-
-        // Reset fileIndex to 0 for next run
-        fileCnt = 0;
     }
     rppDestroyHost(handle);
 
+    // performance test mode
     if (testType == 1)
     {
         // Display measured times
diff --git a/utilities/test_suite/rpp_test_suite_audio.h b/utilities/test_suite/rpp_test_suite_audio.h
index 401f14a80..aa03eb46b 100644
--- a/utilities/test_suite/rpp_test_suite_audio.h
+++ b/utilities/test_suite/rpp_test_suite_audio.h
@@ -40,6 +40,86 @@ std::map<int, string> audioAugmentationMap =
     {1, "to_decibels"},
 };
 
+// sets descriptor dimensions and strides of src/dst
+inline void set_audio_descriptor_dims_and_strides(RpptDescPtr descPtr, int batchSize, int maxHeight, int maxWidth, int maxChannels, int offsetInBytes)
+{
+    descPtr->numDims = 4;
+    descPtr->offsetInBytes = offsetInBytes;
+    descPtr->n = batchSize;
+    descPtr->h = maxHeight;
+    descPtr->w = maxWidth;
+    descPtr->c = maxChannels;
+
+    // Optionally set w stride as a multiple of 8 for src/dst
+    descPtr->w = ((descPtr->w / 8) * 8) + 8;
+    descPtr->strides.nStride = descPtr->c * descPtr->w * descPtr->h;
+    descPtr->strides.hStride = descPtr->c * descPtr->w;
+    descPtr->strides.wStride = descPtr->c;
+    descPtr->strides.cStride = 1;
+}
+
+// sets values of maxHeight and maxWidth
+inline void set_audio_max_dimensions(vector<string> audioFilesPath, int& maxWidth, int& maxChannels)
+{
+    for (const std::string& audioPath : audioFilesPath)
+    {
+        SNDFILE	*infile;
+        SF_INFO sfinfo;
+        int	readcount;
+
+        // The SF_INFO struct must be initialized before using it
+        memset (&sfinfo, 0, sizeof (sfinfo));
+        if (!(infile = sf_open (audioPath.c_str(), SFM_READ, &sfinfo)))
+        {
+            sf_close (infile);
+            continue;
+        }
+
+        maxWidth = std::max(maxWidth, static_cast<int>(sfinfo.frames));
+        maxChannels = std::max(maxChannels, static_cast<int>(sfinfo.channels));
+
+        // Close input
+        sf_close (infile);
+    }
+}
+
+void read_audio_batch_and_fill_dims(RpptDescPtr descPtr, Rpp32f *inputf32, vector<string> audioFilesPath, int iterCount, Rpp32s *srcLengthTensor, Rpp32s *channelsTensor)
+{
+    auto fileIndex = iterCount * descPtr->n;
+    for (int i = 0, j = fileIndex; i < descPtr->n, j < fileIndex + descPtr->n; i++, j++)
+    {
+        Rpp32f *inputTempF32;
+        inputTempF32 = inputf32 + (i * descPtr->strides.nStride);
+
+        // Read and decode data
+        SNDFILE	*infile;
+        SF_INFO sfinfo;
+        int	readcount;
+
+        // The SF_INFO struct must be initialized before using it
+        memset (&sfinfo, 0, sizeof (sfinfo));
+        if (!(infile = sf_open (audioFilesPath[j].c_str(), SFM_READ, &sfinfo)))
+        {
+            sf_close (infile);
+            continue;
+        }
+
+        srcLengthTensor[i] = sfinfo.frames;
+        channelsTensor[i] = sfinfo.channels;
+
+        int bufferLength = sfinfo.frames * sfinfo.channels;
+        readcount = (int) sf_read_float (infile, inputTempF32, bufferLength);
+        if (readcount != bufferLength)
+        {
+            std::cout << "Unable to read audio file: "<<audioFilesPath[j].c_str() << std::endl;
+            exit(0);
+        }
+
+        // Close input
+        sf_close (infile);
+    }
+}
+
 void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dstDims, string testCase, vector<string> audioNames, string dst)
 {
     fstream refFile;
@@ -64,6 +144,10 @@ void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dst
         Rpp32f refVal, outVal;
         Rpp32f *dstPtrCurrent = dstPtr + batchCount * dstDescPtr->strides.nStride;
         Rpp32f *dstPtrRow = dstPtrCurrent;
+        Rpp32u hStride = dstDescPtr->strides.hStride;
+        if (dstDims[batchCount].width == 1)
+            hStride = 1;
+
         for (int i = 0; i < dstDims[batchCount].height; i++)
         {
             Rpp32f *dstPtrTemp = dstPtrRow;
@@ -75,7 +159,7 @@ void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dst
                 if (!invalidComparision && abs(outVal - refVal) < 1e-20)
                     matchedIndices += 1;
             }
-            dstPtrRow += dstDescPtr->strides.hStride;
+            dstPtrRow += hStride;
         }
         refFile.close();
         if (matchedIndices == (dstDims[batchCount].width * dstDims[batchCount].height) && matchedIndices !=0)