diff --git a/src/include/cpu/rpp_cpu_simd.hpp b/src/include/cpu/rpp_cpu_simd.hpp index 381c33635..a5e83d1ff 100644 --- a/src/include/cpu/rpp_cpu_simd.hpp +++ b/src/include/cpu/rpp_cpu_simd.hpp @@ -3085,12 +3085,6 @@ inline void rpp_store24_f32pln3_to_u8pkd3_avx(Rpp8u* dstPtr, __m256* p) _mm256_storeu_si256((__m256i *)(dstPtr), px1); /* store the 24 U8 pixels in dst */ } -inline void rpp_store8_u8pln1_to_u8pln1_avx(Rpp8u* dstPtr, __m256i &p) -{ - __m128i pTemp = _mm256_castsi256_si128(p); - rpp_storeu_si64((__m128i *)(dstPtr), pTemp); -} - inline void rpp_store8_f32pln1_to_u8pln1_avx(Rpp8u* dstPtr, __m256 &p) { __m256i px1 = _mm256_permute4x64_epi64(_mm256_packus_epi32(_mm256_cvtps_epi32(p), avx_px0), _MM_SHUFFLE(3,1,2,0)); @@ -3256,12 +3250,6 @@ inline void rpp_store24_f32pln3_to_i8pkd3_avx(Rpp8s* dstPtr, __m256* p) _mm256_storeu_si256((__m256i *)(dstPtr), px1); /* store the 12 U8 pixels in dst */ } -inline void rpp_store8_i8pln1_to_i8pln1_avx(Rpp8s* dstPtr, __m256i &p) -{ - __m128i pTemp = _mm256_castsi256_si128(p); - rpp_storeu_si64((__m128i *)(dstPtr), pTemp); -} - inline void rpp_store8_f32pln1_to_i8pln1_avx(Rpp8s* dstPtr, __m256 &p) { __m256i px1 = _mm256_permute4x64_epi64(_mm256_packus_epi32(_mm256_cvtps_epi32(p), avx_px0), _MM_SHUFFLE(3,1,2,0)); diff --git a/src/modules/cpu/kernel/water.hpp b/src/modules/cpu/kernel/water.hpp index f12647190..4aa43602a 100644 --- a/src/modules/cpu/kernel/water.hpp +++ b/src/modules/cpu/kernel/water.hpp @@ -281,7 +281,7 @@ RppStatus water_u8_u8_host_tensor(Rpp8u *srcPtr, { __m256i pRow; rpp_simd_load(rpp_generic_nn_load_u8pln1_avx, srcPtrTempChn, srcLocArray, invalidLoad, pRow); - rpp_simd_store(rpp_store8_u8pln1_to_u8pln1_avx, dstPtrTempChn, pRow); + rpp_storeu_si64((__m128i *)(dstPtrTempChn), _mm256_castsi256_si128(pRow)); srcPtrTempChn += srcDescPtr->strides.cStride; dstPtrTempChn += dstDescPtr->strides.cStride; } @@ -965,7 +965,7 @@ RppStatus water_i8_i8_host_tensor(Rpp8s *srcPtr, { __m256i pRow; rpp_simd_load(rpp_generic_nn_load_i8pln1_avx, srcPtrTempChn, srcLocArray, invalidLoad, pRow); - rpp_simd_store(rpp_store8_i8pln1_to_i8pln1_avx, dstPtrTempChn, pRow); + rpp_storeu_si64((__m128i *)(dstPtrTempChn), _mm256_castsi256_si128(pRow)); srcPtrTempChn += srcDescPtr->strides.cStride; dstPtrTempChn += dstDescPtr->strides.cStride; }