From 5a8fb2fc18db569fd3b0da3cec0b1b2319c495d9 Mon Sep 17 00:00:00 2001 From: Abishek <52214183+r-abishek@users.noreply.github.com> Date: Tue, 2 Apr 2024 21:49:37 -0700 Subject: [PATCH] Scratch buffers rename for HOST and HIP (#324) * Change all maskArr to scratchBufferHip * Change all tempFloatmem to scratchBufferHost --- include/rppdefs.h | 4 ++-- src/modules/cl/handleocl.cpp | 4 ++-- src/modules/cpu/kernel/down_mixing.hpp | 2 +- src/modules/handlehost.cpp | 4 ++-- src/modules/hip/handlehip.cpp | 8 ++++---- src/modules/hip/kernel/gaussian_filter.hpp | 2 +- src/modules/hip/kernel/spatter.hpp | 4 ++-- src/modules/hip/kernel/tensor_max.hpp | 6 +++--- src/modules/hip/kernel/tensor_min.hpp | 6 +++--- src/modules/hip/kernel/tensor_sum.hpp | 18 +++++++++--------- src/modules/hip/kernel/warp_affine.hpp | 2 +- .../rppt_tensor_effects_augmentations.cpp | 6 +++--- .../rppt_tensor_geometric_augmentations.cpp | 12 ++++++------ 13 files changed, 39 insertions(+), 39 deletions(-) diff --git a/include/rppdefs.h b/include/rppdefs.h index 02e500fee..b12fcda78 100644 --- a/include/rppdefs.h +++ b/include/rppdefs.h @@ -788,7 +788,7 @@ typedef struct { Rpp64u *dstBatchIndex; Rpp32u *inc; Rpp32u *dstInc; - Rpp32f *tempFloatmem; + Rpp32f *scratchBufferHost; } memCPU; #ifdef OCL_COMPILE @@ -1002,7 +1002,7 @@ typedef struct hipMemRpp8u ucharArr[10]; hipMemRpp8s charArr[10]; hipMemRpptRGB rgbArr; - hipMemRpp32f maskArr; + hipMemRpp32f scratchBufferHip; Rpp64u* srcBatchIndex; Rpp64u* dstBatchIndex; Rpp32u* inc; diff --git a/src/modules/cl/handleocl.cpp b/src/modules/cl/handleocl.cpp index 87f3f73ee..2b5c60ec0 100644 --- a/src/modules/cl/handleocl.cpp +++ b/src/modules/cl/handleocl.cpp @@ -368,7 +368,7 @@ struct HandleImpl this->initHandle->mem.mcpu.ucharArr[i].ucharmem = (Rpp8u *)malloc(sizeof(Rpp8u) * this->nBatchSize); this->initHandle->mem.mcpu.charArr[i].charmem = (Rpp8s *)malloc(sizeof(Rpp8s) * this->nBatchSize); } - this->initHandle->mem.mcpu.tempFloatmem = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3 + this->initHandle->mem.mcpu.scratchBufferHost = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3 } void PreInitializeBuffer() @@ -604,7 +604,7 @@ void Handle::rpp_destroy_object_host() free(this->GetInitHandle()->mem.mcpu.charArr[i].charmem); } - free(this->GetInitHandle()->mem.mcpu.tempFloatmem); + free(this->GetInitHandle()->mem.mcpu.scratchBufferHost); } size_t Handle::GetBatchSize() const diff --git a/src/modules/cpu/kernel/down_mixing.hpp b/src/modules/cpu/kernel/down_mixing.hpp index 9cefc64a2..c9edb319c 100644 --- a/src/modules/cpu/kernel/down_mixing.hpp +++ b/src/modules/cpu/kernel/down_mixing.hpp @@ -53,7 +53,7 @@ RppStatus down_mixing_host_tensor(Rpp32f *srcPtr, } else { - Rpp32f *weights = handle.GetInitHandle()->mem.mcpu.tempFloatmem + batchCount * channels; + Rpp32f *weights = handle.GetInitHandle()->mem.mcpu.scratchBufferHost + batchCount * channels; std::fill(weights, weights + channels, 1.f / channels); if(normalizeWeights) diff --git a/src/modules/handlehost.cpp b/src/modules/handlehost.cpp index 510c06dd0..8c9764df0 100644 --- a/src/modules/handlehost.cpp +++ b/src/modules/handlehost.cpp @@ -47,7 +47,7 @@ struct HandleImpl this->initHandle->mem.mcpu.maxSrcSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize); this->initHandle->mem.mcpu.maxDstSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize); this->initHandle->mem.mcpu.roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * this->nBatchSize); - this->initHandle->mem.mcpu.tempFloatmem = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3 + this->initHandle->mem.mcpu.scratchBufferHost = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3 } }; @@ -77,7 +77,7 @@ void Handle::rpp_destroy_object_host() free(this->GetInitHandle()->mem.mcpu.maxSrcSize); free(this->GetInitHandle()->mem.mcpu.maxDstSize); free(this->GetInitHandle()->mem.mcpu.roiPoints); - free(this->GetInitHandle()->mem.mcpu.tempFloatmem); + free(this->GetInitHandle()->mem.mcpu.scratchBufferHost); } size_t Handle::GetBatchSize() const diff --git a/src/modules/hip/handlehip.cpp b/src/modules/hip/handlehip.cpp index ef675ca1b..42e72db98 100644 --- a/src/modules/hip/handlehip.cpp +++ b/src/modules/hip/handlehip.cpp @@ -190,7 +190,7 @@ struct HandleImpl } this->initHandle->mem.mcpu.rgbArr.rgbmem = (RpptRGB *)malloc(sizeof(RpptRGB) * this->nBatchSize); - this->initHandle->mem.mcpu.tempFloatmem = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3 + this->initHandle->mem.mcpu.scratchBufferHost = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3 } void PreInitializeBuffer() @@ -239,7 +239,7 @@ struct HandleImpl } hipMalloc(&(this->initHandle->mem.mgpu.rgbArr.rgbmem), sizeof(RpptRGB) * this->nBatchSize); - hipMalloc(&(this->initHandle->mem.mgpu.maskArr.floatmem), sizeof(Rpp32f) * 8294400); // 3840 x 2160 + hipMalloc(&(this->initHandle->mem.mgpu.scratchBufferHip.floatmem), sizeof(Rpp32f) * 8294400); // 3840 x 2160 } }; @@ -356,7 +356,7 @@ void Handle::rpp_destroy_object_gpu() } hipFree(this->GetInitHandle()->mem.mgpu.rgbArr.rgbmem); - hipFree(this->GetInitHandle()->mem.mgpu.maskArr.floatmem); + hipFree(this->GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem); } void Handle::rpp_destroy_object_host() @@ -381,7 +381,7 @@ void Handle::rpp_destroy_object_host() } free(this->GetInitHandle()->mem.mcpu.rgbArr.rgbmem); - free(this->GetInitHandle()->mem.mcpu.tempFloatmem); + free(this->GetInitHandle()->mem.mcpu.scratchBufferHost); } size_t Handle::GetBatchSize() const diff --git a/src/modules/hip/kernel/gaussian_filter.hpp b/src/modules/hip/kernel/gaussian_filter.hpp index 2a4c9cfeb..b2ae7a878 100644 --- a/src/modules/hip/kernel/gaussian_filter.hpp +++ b/src/modules/hip/kernel/gaussian_filter.hpp @@ -1996,7 +1996,7 @@ RppStatus hip_exec_gaussian_filter_tensor(T *srcPtr, tileSize.y = 16 - padLengthTwice; // Create a filter of size (kernel size x kernel size) - float *filterTensor = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + float *filterTensor = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hip_exec_create_gaussian_kernel(filterTensor, kernelSize, handle.GetInitHandle()->mem.mgpu.floatArr[0].floatmem, diff --git a/src/modules/hip/kernel/spatter.hpp b/src/modules/hip/kernel/spatter.hpp index 36c163aaa..3df78b4c3 100644 --- a/src/modules/hip/kernel/spatter.hpp +++ b/src/modules/hip/kernel/spatter.hpp @@ -241,8 +241,8 @@ RppStatus hip_exec_spatter_tensor(T *srcPtr, Rpp32u maskSize = SPATTER_MAX_WIDTH * SPATTER_MAX_HEIGHT; Rpp32u maskSizeFloat = maskSize * sizeof(float); float *spatterMaskPtr, *spatterMaskInvPtr; - spatterMaskPtr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; - spatterMaskInvPtr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem + maskSize; + spatterMaskPtr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; + spatterMaskInvPtr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem + maskSize; hipMemcpy(spatterMaskPtr, spatterMask, maskSizeFloat, hipMemcpyHostToDevice); hipMemcpy(spatterMaskInvPtr, spatterMaskInv, maskSizeFloat, hipMemcpyHostToDevice); diff --git a/src/modules/hip/kernel/tensor_max.hpp b/src/modules/hip/kernel/tensor_max.hpp index b47fce024..1aa17483b 100644 --- a/src/modules/hip/kernel/tensor_max.hpp +++ b/src/modules/hip/kernel/tensor_max.hpp @@ -324,7 +324,7 @@ RppStatus hip_exec_tensor_max(T *srcPtr, { Rpp32u partialMaxArrLength = gridDim_x * gridDim_y * gridDim_z; float *partialMaxArr; - partialMaxArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + partialMaxArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemsetAsync(partialMaxArr, minimum, partialMaxArrLength * sizeof(float), handle.GetStream()); hipLaunchKernelGGL(tensor_max_pln1_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -349,7 +349,7 @@ RppStatus hip_exec_tensor_max(T *srcPtr, { Rpp32u partialMaxArrLength = gridDim_x * gridDim_y * gridDim_z * 3; float *partialMaxArr; - partialMaxArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + partialMaxArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemsetAsync(partialMaxArr, minimum, partialMaxArrLength * sizeof(float), handle.GetStream()); hipLaunchKernelGGL(tensor_max_pln3_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -374,7 +374,7 @@ RppStatus hip_exec_tensor_max(T *srcPtr, { Rpp32u partialMaxArrLength = gridDim_x * gridDim_y * gridDim_z * 3; float *partialMaxArr; - partialMaxArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + partialMaxArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemsetAsync(partialMaxArr, minimum, partialMaxArrLength * sizeof(float), handle.GetStream()); hipLaunchKernelGGL(tensor_max_pkd3_hip, dim3(gridDim_x, gridDim_y, gridDim_z), diff --git a/src/modules/hip/kernel/tensor_min.hpp b/src/modules/hip/kernel/tensor_min.hpp index a883c4f3b..6d2c050da 100644 --- a/src/modules/hip/kernel/tensor_min.hpp +++ b/src/modules/hip/kernel/tensor_min.hpp @@ -334,7 +334,7 @@ RppStatus hip_exec_tensor_min(T *srcPtr, { Rpp32u partialMinArrLength = gridDim_x * gridDim_y * gridDim_z; float *partialMinArr; - partialMinArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + partialMinArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemsetAsync(partialMinArr, maximum, partialMinArrLength * sizeof(float), handle.GetStream()); hipLaunchKernelGGL(tensor_min_pln1_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -359,7 +359,7 @@ RppStatus hip_exec_tensor_min(T *srcPtr, { Rpp32u partialMinArrLength = gridDim_x * gridDim_y * gridDim_z * 3; float *partialMinArr; - partialMinArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + partialMinArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemsetAsync(partialMinArr, maximum, partialMinArrLength * sizeof(float), handle.GetStream()); hipLaunchKernelGGL(tensor_min_pln3_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -384,7 +384,7 @@ RppStatus hip_exec_tensor_min(T *srcPtr, { Rpp32u partialMinArrLength = gridDim_x * gridDim_y * gridDim_z * 3; float *partialMinArr; - partialMinArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + partialMinArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemsetAsync(partialMinArr, maximum, partialMinArrLength * sizeof(float), handle.GetStream()); hipLaunchKernelGGL(tensor_min_pkd3_hip, dim3(gridDim_x, gridDim_y, gridDim_z), diff --git a/src/modules/hip/kernel/tensor_sum.hpp b/src/modules/hip/kernel/tensor_sum.hpp index 03c3c3d38..6d37e894f 100644 --- a/src/modules/hip/kernel/tensor_sum.hpp +++ b/src/modules/hip/kernel/tensor_sum.hpp @@ -1152,7 +1152,7 @@ RppStatus hip_exec_tensor_sum(Rpp8u *srcPtr, { Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z; Rpp32u *partialSumArr; - partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem); + partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem); hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(uint), handle.GetStream()); hipLaunchKernelGGL(tensor_sum_pln1_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -1177,7 +1177,7 @@ RppStatus hip_exec_tensor_sum(Rpp8u *srcPtr, { Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3; Rpp32u *partialSumArr; - partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem); + partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem); hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(Rpp32u), handle.GetStream()); hipLaunchKernelGGL(tensor_sum_pln3_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -1202,7 +1202,7 @@ RppStatus hip_exec_tensor_sum(Rpp8u *srcPtr, { Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3; Rpp32u *partialSumArr; - partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem); + partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem); hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(Rpp32u), handle.GetStream()); hipLaunchKernelGGL(tensor_sum_pkd3_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -1249,7 +1249,7 @@ RppStatus hip_exec_tensor_sum(Rpp8s *srcPtr, { Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z; Rpp32s *partialSumArr; - partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem); + partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem); hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(Rpp32s), handle.GetStream()); hipLaunchKernelGGL(tensor_sum_pln1_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -1274,7 +1274,7 @@ RppStatus hip_exec_tensor_sum(Rpp8s *srcPtr, { Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3; Rpp32s *partialSumArr; - partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem); + partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem); hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(Rpp32s), handle.GetStream()); hipLaunchKernelGGL(tensor_sum_pln3_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -1299,7 +1299,7 @@ RppStatus hip_exec_tensor_sum(Rpp8s *srcPtr, { Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3; Rpp32s *partialSumArr; - partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem); + partialSumArr = reinterpret_cast(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem); hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(Rpp32s), handle.GetStream()); hipLaunchKernelGGL(tensor_sum_pkd3_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -1346,7 +1346,7 @@ RppStatus hip_exec_tensor_sum(T *srcPtr, { Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z; float *partialSumArr; - partialSumArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + partialSumArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(float), handle.GetStream()); hipLaunchKernelGGL(tensor_sum_pln1_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -1371,7 +1371,7 @@ RppStatus hip_exec_tensor_sum(T *srcPtr, { Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3; float *partialSumArr; - partialSumArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + partialSumArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(float), handle.GetStream()); hipLaunchKernelGGL(tensor_sum_pln3_hip, dim3(gridDim_x, gridDim_y, gridDim_z), @@ -1396,7 +1396,7 @@ RppStatus hip_exec_tensor_sum(T *srcPtr, { Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3; float *partialSumArr; - partialSumArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + partialSumArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(float), handle.GetStream()); hipLaunchKernelGGL(tensor_sum_pkd3_hip, dim3(gridDim_x, gridDim_y, gridDim_z), diff --git a/src/modules/hip/kernel/warp_affine.hpp b/src/modules/hip/kernel/warp_affine.hpp index 7fd43fb53..8de7a6697 100644 --- a/src/modules/hip/kernel/warp_affine.hpp +++ b/src/modules/hip/kernel/warp_affine.hpp @@ -329,7 +329,7 @@ RppStatus hip_exec_warp_affine_tensor(T *srcPtr, int globalThreads_y = dstDescPtr->h; int globalThreads_z = handle.GetBatchSize(); - float *affineTensorPtr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem; + float *affineTensorPtr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemcpy(affineTensorPtr, affineTensor, 6 * handle.GetBatchSize() * sizeof(float), hipMemcpyHostToDevice); if (interpolationType == RpptInterpolationType::BILINEAR) diff --git a/src/modules/rppt_tensor_effects_augmentations.cpp b/src/modules/rppt_tensor_effects_augmentations.cpp index 706040d17..b7f4d3aa9 100644 --- a/src/modules/rppt_tensor_effects_augmentations.cpp +++ b/src/modules/rppt_tensor_effects_augmentations.cpp @@ -951,7 +951,7 @@ RppStatus rppt_salt_and_pepper_noise_gpu(RppPtr_t srcPtr, xorwowInitialState.counter = 0x64F0C9 + seed; RpptXorwowState *d_xorwowInitialStatePtr; - d_xorwowInitialStatePtr = (RpptXorwowState *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.maskArr.floatmem; + d_xorwowInitialStatePtr = (RpptXorwowState *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemcpy(d_xorwowInitialStatePtr, &xorwowInitialState, sizeof(RpptXorwowState), hipMemcpyHostToDevice); if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) @@ -1036,7 +1036,7 @@ RppStatus rppt_shot_noise_gpu(RppPtr_t srcPtr, xorwowInitialState.boxMullerExtra = 0.0f; RpptXorwowStateBoxMuller *d_xorwowInitialStatePtr; - d_xorwowInitialStatePtr = (RpptXorwowStateBoxMuller *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.maskArr.floatmem; + d_xorwowInitialStatePtr = (RpptXorwowStateBoxMuller *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemcpy(d_xorwowInitialStatePtr, &xorwowInitialState, sizeof(RpptXorwowStateBoxMuller), hipMemcpyHostToDevice); if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) @@ -1119,7 +1119,7 @@ RppStatus rppt_gaussian_noise_gpu(RppPtr_t srcPtr, xorwowInitialState.boxMullerExtra = 0.0f; RpptXorwowStateBoxMuller *d_xorwowInitialStatePtr; - d_xorwowInitialStatePtr = (RpptXorwowStateBoxMuller *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.maskArr.floatmem; + d_xorwowInitialStatePtr = (RpptXorwowStateBoxMuller *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; hipMemcpy(d_xorwowInitialStatePtr, &xorwowInitialState, sizeof(RpptXorwowStateBoxMuller), hipMemcpyHostToDevice); if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) diff --git a/src/modules/rppt_tensor_geometric_augmentations.cpp b/src/modules/rppt_tensor_geometric_augmentations.cpp index 178ce2323..c7de67b8e 100644 --- a/src/modules/rppt_tensor_geometric_augmentations.cpp +++ b/src/modules/rppt_tensor_geometric_augmentations.cpp @@ -526,7 +526,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem, + rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost, tempDescPtr, dstImgSizes, roiTensorPtrSrc, @@ -541,7 +541,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem, + rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost, tempDescPtr, dstImgSizes, roiTensorPtrSrc, @@ -556,7 +556,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem, + rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost, tempDescPtr, dstImgSizes, roiTensorPtrSrc, @@ -571,7 +571,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr, srcDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, - rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem, + rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost, tempDescPtr, dstImgSizes, roiTensorPtrSrc, @@ -789,7 +789,7 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr, RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c); // Compute affine transformation matrix from rotate angle - Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem; + Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost; for(int idx = 0; idx < srcDescPtr->n; idx++) { Rpp32f angleInRad = angle[idx] * PI_OVER_180; @@ -1639,7 +1639,7 @@ RppStatus rppt_rotate_gpu(RppPtr_t srcPtr, return RPP_ERROR_NOT_IMPLEMENTED; // Compute affine transformation matrix from rotate angle - Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem; + Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost; for(int idx = 0; idx < srcDescPtr->n; idx++) { Rpp32f angleInRad = angle[idx] * PI_OVER_180;