diff --git a/include/rppt_tensor_audio_augmentations.h b/include/rppt_tensor_audio_augmentations.h
index 138b3baa8..31bb34eff 100644
--- a/include/rppt_tensor_audio_augmentations.h
+++ b/include/rppt_tensor_audio_augmentations.h
@@ -95,7 +95,22 @@ RppStatus rppt_to_decibels_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_
  */
 RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcLengthTensor, Rpp32f *coeffTensor, RpptAudioBorderType borderType, rppHandle_t rppHandle);
 
+/*! \brief Down Mixing augmentation on HOST backend
+* \details Down Mixing augmentation for audio data
+* \param[in] srcPtr source tensor in HOST memory
+* \param[in] srcDescPtr source tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param[out] dstPtr destination tensor in HOST memory
+* \param[in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 3, offsetInBytes >= 0, dataType = F32)
+* \param[in] srcDimsTensor source audio buffer length and number of channels (1D tensor in HOST memory, of size batchSize * 2)
+* \param[in] normalizeWeights bool flag to specify if normalization of weights is needed
+* \param[in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+* \return A <tt> \ref RppStatus</tt> enumeration.
+* \retval RPP_SUCCESS Successful completion.
+* \retval RPP_ERROR* Unsuccessful completion.
+*/
+RppStatus rppt_down_mixing_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32s *srcDimsTensor, bool normalizeWeights, rppHandle_t rppHandle);
+
 #ifdef __cplusplus
 }
 #endif
-#endif // RPPT_TENSOR_AUDIO_AUGMENTATIONS_H
\ No newline at end of file
+#endif // RPPT_TENSOR_AUDIO_AUGMENTATIONS_H
diff --git a/src/include/cpu/rpp_cpu_simd.hpp b/src/include/cpu/rpp_cpu_simd.hpp
index 84c898b90..ff30de027 100644
--- a/src/include/cpu/rpp_cpu_simd.hpp
+++ b/src/include/cpu/rpp_cpu_simd.hpp
@@ -2438,6 +2438,29 @@ static inline __m128 log_ps(__m128 x)
     return x;
 }
 
+inline Rpp32f rpp_hsum_ps(__m128 x)
+{
+    __m128 shuf = _mm_movehdup_ps(x);        // broadcast elements 3,1 to 2,0
+    __m128 sums = _mm_add_ps(x, shuf);
+    shuf = _mm_movehl_ps(shuf, sums);        // high half -> low half
+    sums = _mm_add_ss(sums, shuf);
+    return _mm_cvtss_f32(sums);
+}
+
+inline Rpp32f rpp_hsum_ps(__m256 x)
+{
+    __m128 p0 = _mm256_extractf128_ps(x, 1); // Contains x7, x6, x5, x4
+    __m128 p1 = _mm256_castps256_ps128(x);   // Contains x3, x2, x1, x0
+    __m128 sum = _mm_add_ps(p0, p1);         // Contains x3 + x7, x2 + x6, x1 + x5, x0 + x4
+    p0 = sum;                                // Contains -, -, x1 + x5, x0 + x4
+    p1 = _mm_movehl_ps(sum, sum);            // Contains -, -, x3 + x7, x2 + x6
+    sum = _mm_add_ps(p0, p1);                // Contains -, -, x1 + x3 + x5 + x7, x0 + x2 + x4 + x6
+    p0 = sum;                                // Contains -, -, -, x0 + x2 + x4 + x6
+    p1 = _mm_shuffle_ps(sum, sum, 0x1);      // Contains -, -, -, x1 + x3 + x5 + x7
+    sum = _mm_add_ss(p0, p1);                // Contains -, -, -, x0 + x1 + x2 + x3 + x4 + x5 + x6 + x7
+    return _mm_cvtss_f32(sum);
+}
+
 static inline void fast_matmul4x4_sse(float *A, float *B, float *C)
 {
     __m128 row1 = _mm_load_ps(&B[0]);                   // Row 0 of B
diff --git a/src/modules/cpu/host_tensor_audio_augmentations.hpp b/src/modules/cpu/host_tensor_audio_augmentations.hpp
index 7737b38c3..e2edb1afc 100644
--- a/src/modules/cpu/host_tensor_audio_augmentations.hpp
+++ b/src/modules/cpu/host_tensor_audio_augmentations.hpp
@@ -28,5 +28,6 @@ SOFTWARE.
 #include "kernel/non_silent_region_detection.hpp"
 #include "kernel/to_decibels.hpp"
 #include "kernel/pre_emphasis_filter.hpp"
+#include "kernel/down_mixing.hpp"
 
 #endif // HOST_TENSOR_AUDIO_AUGMENTATIONS_HPP
\ No newline at end of file
diff --git a/src/modules/cpu/kernel/down_mixing.hpp b/src/modules/cpu/kernel/down_mixing.hpp
new file mode 100644
index 000000000..9cefc64a2
--- /dev/null
+++ b/src/modules/cpu/kernel/down_mixing.hpp
@@ -0,0 +1,122 @@
+/*
+MIT License
+
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+#include "rppdefs.h"
+#include <omp.h>
+
+RppStatus down_mixing_host_tensor(Rpp32f *srcPtr,
+                                  RpptDescPtr srcDescPtr,
+                                  Rpp32f *dstPtr,
+                                  RpptDescPtr dstDescPtr,
+                                  Rpp32s *srcDimsTensor,
+                                  bool normalizeWeights,
+                                  rpp::Handle& handle)
+{
+    Rpp32u numThreads = handle.GetNumThreads();
+
+    omp_set_dynamic(0);
+#pragma omp parallel for num_threads(numThreads)
+    for(int batchCount = 0; batchCount < srcDescPtr->n; batchCount++)
+    {
+        Rpp32f *srcPtrTemp = srcPtr + batchCount * srcDescPtr->strides.nStride;
+        Rpp32f *dstPtrTemp = dstPtr + batchCount * dstDescPtr->strides.nStride;
+
+        Rpp32s samples = srcDimsTensor[batchCount * 2];
+        Rpp32s channels = srcDimsTensor[batchCount * 2 + 1];
+        bool flagAVX = 0;
+
+        if(channels == 1)
+        {
+            // No need of downmixing, do a direct memcpy
+            memcpy(dstPtrTemp, srcPtrTemp, (size_t)(samples * sizeof(Rpp32f)));
+        }
+        else
+        {
+            Rpp32f *weights = handle.GetInitHandle()->mem.mcpu.tempFloatmem + batchCount * channels;
+            std::fill(weights, weights + channels, 1.f / channels);
+
+            if(normalizeWeights)
+            {
+                // Compute sum of the weights
+                Rpp32f sum = 0.0;
+                for(int i = 0; i < channels; i++)
+                    sum += weights[i];
+
+                // Normalize the weights
+                Rpp32f invSum = 1.0 / sum;
+                for(int i = 0; i < channels; i++)
+                    weights[i] *= invSum;
+            }
+
+            Rpp32s channelIncrement = 4;
+            Rpp32s alignedChannels = (channels / 4) * 4;
+            if(channels > 7)
+            {
+                flagAVX = 1;
+                channelIncrement = 8;
+                alignedChannels = (channels / 8) * 8;
+            }
+
+            // use weights to downmix to mono
+            for(int64_t dstIdx = 0; dstIdx < samples; dstIdx++)
+            {
+                Rpp32s channelLoopCount = 0;
+                // if number of channels are greater than or equal to 8, use AVX implementation
+                if(flagAVX)
+                {
+                    __m256 pDst = avx_p0;
+                    for(; channelLoopCount < alignedChannels; channelLoopCount += channelIncrement)
+                    {
+                        __m256 pSrc, pWeights;
+                        pWeights = _mm256_setr_ps(weights[channelLoopCount], weights[channelLoopCount + 1], weights[channelLoopCount + 2], weights[channelLoopCount + 3],
+                                weights[channelLoopCount + 4], weights[channelLoopCount + 5], weights[channelLoopCount + 6], weights[channelLoopCount + 7]);
+                        pSrc = _mm256_loadu_ps(srcPtrTemp);
+                        pSrc = _mm256_mul_ps(pSrc, pWeights);
+                        pDst = _mm256_add_ps(pDst, pSrc);
+                        srcPtrTemp += channelIncrement;
+                    }
+                    dstPtrTemp[dstIdx] = rpp_hsum_ps(pDst);
+                }
+                else
+                {
+                    __m128 pDst = xmm_p0;
+                    for(; channelLoopCount < alignedChannels; channelLoopCount += channelIncrement)
+                    {
+                        __m128 pSrc, pWeights;
+                        pWeights = _mm_setr_ps(weights[channelLoopCount], weights[channelLoopCount + 1], weights[channelLoopCount + 2], weights[channelLoopCount + 3]);
+                        pSrc = _mm_loadu_ps(srcPtrTemp);
+                        pSrc = _mm_mul_ps(pSrc, pWeights);
+                        pDst = _mm_add_ps(pDst, pSrc);
+                        srcPtrTemp += channelIncrement;
+                    }
+                    dstPtrTemp[dstIdx] = rpp_hsum_ps(pDst);
+                }
+                for(; channelLoopCount < channels; channelLoopCount++)
+                    dstPtrTemp[dstIdx] += ((*srcPtrTemp++) * weights[channelLoopCount]);
+            }
+        }
+    }
+
+    return RPP_SUCCESS;
+}
diff --git a/src/modules/rppt_tensor_audio_augmentations.cpp b/src/modules/rppt_tensor_audio_augmentations.cpp
index 23b52bc44..d78b8890a 100644
--- a/src/modules/rppt_tensor_audio_augmentations.cpp
+++ b/src/modules/rppt_tensor_audio_augmentations.cpp
@@ -126,3 +126,31 @@ RppStatus rppt_pre_emphasis_filter_host(RppPtr_t srcPtr,
         return RPP_ERROR_NOT_IMPLEMENTED;
     }
 }
+
+/******************** down_mixing ********************/
+
+RppStatus rppt_down_mixing_host(RppPtr_t srcPtr,
+                                RpptDescPtr srcDescPtr,
+                                RppPtr_t dstPtr,
+                                RpptDescPtr dstDescPtr,
+                                Rpp32s *srcDimsTensor,
+                                bool  normalizeWeights,
+                                rppHandle_t rppHandle)
+{
+    if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32))
+    {
+        down_mixing_host_tensor(static_cast<Rpp32f*>(srcPtr),
+                                srcDescPtr,
+                                static_cast<Rpp32f*>(dstPtr),
+                                dstDescPtr,
+                                srcDimsTensor,
+                                normalizeWeights,
+                                rpp::deref(rppHandle));
+
+        return RPP_SUCCESS;
+    }
+    else
+    {
+        return RPP_ERROR_NOT_IMPLEMENTED;
+    }
+}
diff --git a/utilities/test_suite/HOST/CMakeLists.txt b/utilities/test_suite/HOST/CMakeLists.txt
index 9cb32ae78..b7abf5d77 100644
--- a/utilities/test_suite/HOST/CMakeLists.txt
+++ b/utilities/test_suite/HOST/CMakeLists.txt
@@ -109,7 +109,7 @@ else()
 endif()
 
 if(NOT libsnd_LIBS)
-    message("-- ${Yellow}Warning: libsndfile must be installed to install ${PROJECT_NAME}/Tensor_voxel_host successfully!${ColourReset}")
+    message("-- ${Yellow}Warning: libsndfile must be installed to install ${PROJECT_NAME}/Tensor_audio_host successfully!${ColourReset}")
 else()
     message("-- ${Green}${PROJECT_NAME} set to build with rpp and libsndfile ${ColourReset}")
     include_directories(${ROCM_PATH}/include ${ROCM_PATH}/include/rpp /usr/local/include)
diff --git a/utilities/test_suite/HOST/Tensor_host_audio.cpp b/utilities/test_suite/HOST/Tensor_host_audio.cpp
index 139e7e97e..fe6fa1246 100644
--- a/utilities/test_suite/HOST/Tensor_host_audio.cpp
+++ b/utilities/test_suite/HOST/Tensor_host_audio.cpp
@@ -197,6 +197,25 @@ int main(int argc, char **argv)
 
                     break;
                 }
+                case 3:
+                {
+                    testCaseName = "down_mixing";
+                    bool normalizeWeights = false;
+                    Rpp32s srcDimsTensor[batchSize * 2];
+
+                    for (int i = 0, j = 0; i < batchSize; i++, j += 2)
+                    {
+                        srcDimsTensor[j] = srcLengthTensor[i];
+                        srcDimsTensor[j + 1] = channelsTensor[i];
+                        dstDims[i].height = srcLengthTensor[i];
+                        dstDims[i].width = 1;
+                    }
+
+                    startWallTime = omp_get_wtime();
+                    rppt_down_mixing_host(inputf32, srcDescPtr, outputf32, dstDescPtr, srcDimsTensor, normalizeWeights, handle);
+
+                    break;
+                }
                 default:
                 {
                     missingFuncFlag = 1;
@@ -263,4 +282,4 @@ int main(int argc, char **argv)
     free(inputf32);
     free(outputf32);
     return 0;
-}
+}
\ No newline at end of file
diff --git a/utilities/test_suite/HOST/runAudioTests.py b/utilities/test_suite/HOST/runAudioTests.py
index c05a7a011..70ec00026 100644
--- a/utilities/test_suite/HOST/runAudioTests.py
+++ b/utilities/test_suite/HOST/runAudioTests.py
@@ -37,7 +37,7 @@
 outFolderPath = os.getcwd()
 buildFolderPath = os.getcwd()
 caseMin = 0
-caseMax = 2
+caseMax = 3
 
 # Checks if the folder path is empty, or is it a root folder, or if it exists, and remove its contents
 def validate_and_remove_files(path):
@@ -235,13 +235,31 @@ def rpp_test_suite_parser_and_validator():
         exit(0)
 
     for case in caseList:
+        if "--input_path" not in sys.argv:
+            if case == "3":
+                srcPath = scriptPath + "/../TEST_AUDIO_FILES/three_sample_multi_channel_src1"
+            else:
+                srcPath = inFilePath
+        if int(case) < 0 or int(case) > 3:
+            print(f"Invalid case number {case}. Case number must be 0-3 range!")
+            continue
+
         run_unit_test(srcPath, case, numRuns, testType, batchSize, outFilePath)
 else:
     for case in caseList:
+        if "--input_path" not in sys.argv:
+            if case == "3":
+                srcPath = scriptPath + "/../TEST_AUDIO_FILES/three_sample_multi_channel_src1"
+            else:
+                srcPath = inFilePath
+        if int(case) < 0 or int(case) > 3:
+            print(f"Invalid case number {case}. Case number must be 0-3 range!")
+            continue
+
         run_performance_test(loggingFolder, srcPath, case, numRuns, testType, batchSize, outFilePath)
 
 # print the results of qa tests
-supportedCaseList = ['0', '1', '2']
+supportedCaseList = ['0', '1', '2', '3']
 nonQACaseList = [] # Add cases present in supportedCaseList, but without QA support
 
 if testType == 0:
diff --git a/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/down_mixing/down_mixing.bin b/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/down_mixing/down_mixing.bin
new file mode 100644
index 000000000..cb7c8bb84
Binary files /dev/null and b/utilities/test_suite/REFERENCE_OUTPUTS_AUDIO/down_mixing/down_mixing.bin differ
diff --git a/utilities/test_suite/TEST_AUDIO_FILES/single_sample_multi_channel_src1/sample.wav b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample1.wav
similarity index 100%
rename from utilities/test_suite/TEST_AUDIO_FILES/single_sample_multi_channel_src1/sample.wav
rename to utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample1.wav
diff --git a/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample2.wav b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample2.wav
new file mode 100644
index 000000000..4847f78cd
Binary files /dev/null and b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample2.wav differ
diff --git a/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample3.wav b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample3.wav
new file mode 100644
index 000000000..a506e1762
Binary files /dev/null and b/utilities/test_suite/TEST_AUDIO_FILES/three_sample_multi_channel_src1/sample3.wav differ
diff --git a/utilities/test_suite/rpp_test_suite_audio.h b/utilities/test_suite/rpp_test_suite_audio.h
index 2ac174042..ec962270a 100644
--- a/utilities/test_suite/rpp_test_suite_audio.h
+++ b/utilities/test_suite/rpp_test_suite_audio.h
@@ -39,6 +39,7 @@ std::map<int, string> audioAugmentationMap =
     {0, "non_silent_region_detection"},
     {1, "to_decibels"},
     {2, "pre_emphasis_filter"},
+    {3, "down_mixing"},
 };
 
 // Golden outputs for Non Silent Region Detection
@@ -137,7 +138,7 @@ void verify_output(Rpp32f *dstPtr, RpptDescPtr dstDescPtr, RpptImagePatchPtr dst
     // read data from golden outputs
     Rpp64u oBufferSize = dstDescPtr->n * dstDescPtr->strides.nStride;
     Rpp32f *refOutput = static_cast<Rpp32f *>(malloc(oBufferSize * sizeof(float)));
-    string outFile = scriptPath + testCase + "/" + testCase + ".bin";
+    string outFile = scriptPath + "/../REFERENCE_OUTPUTS_AUDIO/" + testCase + "/" + testCase + ".bin";
     std::fstream fin(outFile, std::ios::in | std::ios::binary);
     if(fin.is_open())
     {