Skip to content

Commit

Permalink
Scratch buffers rename for HOST and HIP (ROCm#324)
Browse files Browse the repository at this point in the history
* Change all maskArr to scratchBufferHip

* Change all tempFloatmem to scratchBufferHost
  • Loading branch information
r-abishek authored and kiritigowda committed Apr 12, 2024
1 parent a00c696 commit 5a8fb2f
Show file tree
Hide file tree
Showing 13 changed files with 39 additions and 39 deletions.
4 changes: 2 additions & 2 deletions include/rppdefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -788,7 +788,7 @@ typedef struct {
Rpp64u *dstBatchIndex;
Rpp32u *inc;
Rpp32u *dstInc;
Rpp32f *tempFloatmem;
Rpp32f *scratchBufferHost;
} memCPU;

#ifdef OCL_COMPILE
Expand Down Expand Up @@ -1002,7 +1002,7 @@ typedef struct
hipMemRpp8u ucharArr[10];
hipMemRpp8s charArr[10];
hipMemRpptRGB rgbArr;
hipMemRpp32f maskArr;
hipMemRpp32f scratchBufferHip;
Rpp64u* srcBatchIndex;
Rpp64u* dstBatchIndex;
Rpp32u* inc;
Expand Down
4 changes: 2 additions & 2 deletions src/modules/cl/handleocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ struct HandleImpl
this->initHandle->mem.mcpu.ucharArr[i].ucharmem = (Rpp8u *)malloc(sizeof(Rpp8u) * this->nBatchSize);
this->initHandle->mem.mcpu.charArr[i].charmem = (Rpp8s *)malloc(sizeof(Rpp8s) * this->nBatchSize);
}
this->initHandle->mem.mcpu.tempFloatmem = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3
this->initHandle->mem.mcpu.scratchBufferHost = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3
}

void PreInitializeBuffer()
Expand Down Expand Up @@ -604,7 +604,7 @@ void Handle::rpp_destroy_object_host()
free(this->GetInitHandle()->mem.mcpu.charArr[i].charmem);
}

free(this->GetInitHandle()->mem.mcpu.tempFloatmem);
free(this->GetInitHandle()->mem.mcpu.scratchBufferHost);
}

size_t Handle::GetBatchSize() const
Expand Down
2 changes: 1 addition & 1 deletion src/modules/cpu/kernel/down_mixing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ RppStatus down_mixing_host_tensor(Rpp32f *srcPtr,
}
else
{
Rpp32f *weights = handle.GetInitHandle()->mem.mcpu.tempFloatmem + batchCount * channels;
Rpp32f *weights = handle.GetInitHandle()->mem.mcpu.scratchBufferHost + batchCount * channels;
std::fill(weights, weights + channels, 1.f / channels);

if(normalizeWeights)
Expand Down
4 changes: 2 additions & 2 deletions src/modules/handlehost.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct HandleImpl
this->initHandle->mem.mcpu.maxSrcSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize);
this->initHandle->mem.mcpu.maxDstSize = (RppiSize *)malloc(sizeof(RppiSize) * this->nBatchSize);
this->initHandle->mem.mcpu.roiPoints = (RppiROI *)malloc(sizeof(RppiROI) * this->nBatchSize);
this->initHandle->mem.mcpu.tempFloatmem = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3
this->initHandle->mem.mcpu.scratchBufferHost = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3
}
};

Expand Down Expand Up @@ -77,7 +77,7 @@ void Handle::rpp_destroy_object_host()
free(this->GetInitHandle()->mem.mcpu.maxSrcSize);
free(this->GetInitHandle()->mem.mcpu.maxDstSize);
free(this->GetInitHandle()->mem.mcpu.roiPoints);
free(this->GetInitHandle()->mem.mcpu.tempFloatmem);
free(this->GetInitHandle()->mem.mcpu.scratchBufferHost);
}

size_t Handle::GetBatchSize() const
Expand Down
8 changes: 4 additions & 4 deletions src/modules/hip/handlehip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ struct HandleImpl
}

this->initHandle->mem.mcpu.rgbArr.rgbmem = (RpptRGB *)malloc(sizeof(RpptRGB) * this->nBatchSize);
this->initHandle->mem.mcpu.tempFloatmem = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3
this->initHandle->mem.mcpu.scratchBufferHost = (Rpp32f *)malloc(sizeof(Rpp32f) * 99532800 * this->nBatchSize); // 7680 * 4320 * 3
}

void PreInitializeBuffer()
Expand Down Expand Up @@ -239,7 +239,7 @@ struct HandleImpl
}

hipMalloc(&(this->initHandle->mem.mgpu.rgbArr.rgbmem), sizeof(RpptRGB) * this->nBatchSize);
hipMalloc(&(this->initHandle->mem.mgpu.maskArr.floatmem), sizeof(Rpp32f) * 8294400); // 3840 x 2160
hipMalloc(&(this->initHandle->mem.mgpu.scratchBufferHip.floatmem), sizeof(Rpp32f) * 8294400); // 3840 x 2160
}
};

Expand Down Expand Up @@ -356,7 +356,7 @@ void Handle::rpp_destroy_object_gpu()
}

hipFree(this->GetInitHandle()->mem.mgpu.rgbArr.rgbmem);
hipFree(this->GetInitHandle()->mem.mgpu.maskArr.floatmem);
hipFree(this->GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem);
}

void Handle::rpp_destroy_object_host()
Expand All @@ -381,7 +381,7 @@ void Handle::rpp_destroy_object_host()
}

free(this->GetInitHandle()->mem.mcpu.rgbArr.rgbmem);
free(this->GetInitHandle()->mem.mcpu.tempFloatmem);
free(this->GetInitHandle()->mem.mcpu.scratchBufferHost);
}

size_t Handle::GetBatchSize() const
Expand Down
2 changes: 1 addition & 1 deletion src/modules/hip/kernel/gaussian_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1996,7 +1996,7 @@ RppStatus hip_exec_gaussian_filter_tensor(T *srcPtr,
tileSize.y = 16 - padLengthTwice;

// Create a filter of size (kernel size x kernel size)
float *filterTensor = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
float *filterTensor = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hip_exec_create_gaussian_kernel(filterTensor,
kernelSize,
handle.GetInitHandle()->mem.mgpu.floatArr[0].floatmem,
Expand Down
4 changes: 2 additions & 2 deletions src/modules/hip/kernel/spatter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,8 @@ RppStatus hip_exec_spatter_tensor(T *srcPtr,
Rpp32u maskSize = SPATTER_MAX_WIDTH * SPATTER_MAX_HEIGHT;
Rpp32u maskSizeFloat = maskSize * sizeof(float);
float *spatterMaskPtr, *spatterMaskInvPtr;
spatterMaskPtr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
spatterMaskInvPtr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem + maskSize;
spatterMaskPtr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
spatterMaskInvPtr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem + maskSize;
hipMemcpy(spatterMaskPtr, spatterMask, maskSizeFloat, hipMemcpyHostToDevice);
hipMemcpy(spatterMaskInvPtr, spatterMaskInv, maskSizeFloat, hipMemcpyHostToDevice);

Expand Down
6 changes: 3 additions & 3 deletions src/modules/hip/kernel/tensor_max.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ RppStatus hip_exec_tensor_max(T *srcPtr,
{
Rpp32u partialMaxArrLength = gridDim_x * gridDim_y * gridDim_z;
float *partialMaxArr;
partialMaxArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
partialMaxArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemsetAsync(partialMaxArr, minimum, partialMaxArrLength * sizeof(float), handle.GetStream());
hipLaunchKernelGGL(tensor_max_pln1_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand All @@ -349,7 +349,7 @@ RppStatus hip_exec_tensor_max(T *srcPtr,
{
Rpp32u partialMaxArrLength = gridDim_x * gridDim_y * gridDim_z * 3;
float *partialMaxArr;
partialMaxArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
partialMaxArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemsetAsync(partialMaxArr, minimum, partialMaxArrLength * sizeof(float), handle.GetStream());
hipLaunchKernelGGL(tensor_max_pln3_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand All @@ -374,7 +374,7 @@ RppStatus hip_exec_tensor_max(T *srcPtr,
{
Rpp32u partialMaxArrLength = gridDim_x * gridDim_y * gridDim_z * 3;
float *partialMaxArr;
partialMaxArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
partialMaxArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemsetAsync(partialMaxArr, minimum, partialMaxArrLength * sizeof(float), handle.GetStream());
hipLaunchKernelGGL(tensor_max_pkd3_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand Down
6 changes: 3 additions & 3 deletions src/modules/hip/kernel/tensor_min.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ RppStatus hip_exec_tensor_min(T *srcPtr,
{
Rpp32u partialMinArrLength = gridDim_x * gridDim_y * gridDim_z;
float *partialMinArr;
partialMinArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
partialMinArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemsetAsync(partialMinArr, maximum, partialMinArrLength * sizeof(float), handle.GetStream());
hipLaunchKernelGGL(tensor_min_pln1_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand All @@ -359,7 +359,7 @@ RppStatus hip_exec_tensor_min(T *srcPtr,
{
Rpp32u partialMinArrLength = gridDim_x * gridDim_y * gridDim_z * 3;
float *partialMinArr;
partialMinArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
partialMinArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemsetAsync(partialMinArr, maximum, partialMinArrLength * sizeof(float), handle.GetStream());
hipLaunchKernelGGL(tensor_min_pln3_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand All @@ -384,7 +384,7 @@ RppStatus hip_exec_tensor_min(T *srcPtr,
{
Rpp32u partialMinArrLength = gridDim_x * gridDim_y * gridDim_z * 3;
float *partialMinArr;
partialMinArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
partialMinArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemsetAsync(partialMinArr, maximum, partialMinArrLength * sizeof(float), handle.GetStream());
hipLaunchKernelGGL(tensor_min_pkd3_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand Down
18 changes: 9 additions & 9 deletions src/modules/hip/kernel/tensor_sum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1152,7 +1152,7 @@ RppStatus hip_exec_tensor_sum(Rpp8u *srcPtr,
{
Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z;
Rpp32u *partialSumArr;
partialSumArr = reinterpret_cast<Rpp32u*>(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem);
partialSumArr = reinterpret_cast<Rpp32u*>(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem);
hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(uint), handle.GetStream());
hipLaunchKernelGGL(tensor_sum_pln1_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand All @@ -1177,7 +1177,7 @@ RppStatus hip_exec_tensor_sum(Rpp8u *srcPtr,
{
Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3;
Rpp32u *partialSumArr;
partialSumArr = reinterpret_cast<Rpp32u*>(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem);
partialSumArr = reinterpret_cast<Rpp32u*>(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem);
hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(Rpp32u), handle.GetStream());
hipLaunchKernelGGL(tensor_sum_pln3_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand All @@ -1202,7 +1202,7 @@ RppStatus hip_exec_tensor_sum(Rpp8u *srcPtr,
{
Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3;
Rpp32u *partialSumArr;
partialSumArr = reinterpret_cast<Rpp32u*>(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem);
partialSumArr = reinterpret_cast<Rpp32u*>(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem);
hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(Rpp32u), handle.GetStream());
hipLaunchKernelGGL(tensor_sum_pkd3_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand Down Expand Up @@ -1249,7 +1249,7 @@ RppStatus hip_exec_tensor_sum(Rpp8s *srcPtr,
{
Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z;
Rpp32s *partialSumArr;
partialSumArr = reinterpret_cast<Rpp32s*>(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem);
partialSumArr = reinterpret_cast<Rpp32s*>(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem);
hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(Rpp32s), handle.GetStream());
hipLaunchKernelGGL(tensor_sum_pln1_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand All @@ -1274,7 +1274,7 @@ RppStatus hip_exec_tensor_sum(Rpp8s *srcPtr,
{
Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3;
Rpp32s *partialSumArr;
partialSumArr = reinterpret_cast<Rpp32s*>(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem);
partialSumArr = reinterpret_cast<Rpp32s*>(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem);
hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(Rpp32s), handle.GetStream());
hipLaunchKernelGGL(tensor_sum_pln3_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand All @@ -1299,7 +1299,7 @@ RppStatus hip_exec_tensor_sum(Rpp8s *srcPtr,
{
Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3;
Rpp32s *partialSumArr;
partialSumArr = reinterpret_cast<Rpp32s*>(handle.GetInitHandle()->mem.mgpu.maskArr.floatmem);
partialSumArr = reinterpret_cast<Rpp32s*>(handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem);
hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(Rpp32s), handle.GetStream());
hipLaunchKernelGGL(tensor_sum_pkd3_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand Down Expand Up @@ -1346,7 +1346,7 @@ RppStatus hip_exec_tensor_sum(T *srcPtr,
{
Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z;
float *partialSumArr;
partialSumArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
partialSumArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(float), handle.GetStream());
hipLaunchKernelGGL(tensor_sum_pln1_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand All @@ -1371,7 +1371,7 @@ RppStatus hip_exec_tensor_sum(T *srcPtr,
{
Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3;
float *partialSumArr;
partialSumArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
partialSumArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(float), handle.GetStream());
hipLaunchKernelGGL(tensor_sum_pln3_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand All @@ -1396,7 +1396,7 @@ RppStatus hip_exec_tensor_sum(T *srcPtr,
{
Rpp32u partialSumArrLength = gridDim_x * gridDim_y * gridDim_z * 3;
float *partialSumArr;
partialSumArr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
partialSumArr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemsetAsync(partialSumArr, 0, partialSumArrLength * sizeof(float), handle.GetStream());
hipLaunchKernelGGL(tensor_sum_pkd3_hip,
dim3(gridDim_x, gridDim_y, gridDim_z),
Expand Down
2 changes: 1 addition & 1 deletion src/modules/hip/kernel/warp_affine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ RppStatus hip_exec_warp_affine_tensor(T *srcPtr,
int globalThreads_y = dstDescPtr->h;
int globalThreads_z = handle.GetBatchSize();

float *affineTensorPtr = handle.GetInitHandle()->mem.mgpu.maskArr.floatmem;
float *affineTensorPtr = handle.GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemcpy(affineTensorPtr, affineTensor, 6 * handle.GetBatchSize() * sizeof(float), hipMemcpyHostToDevice);

if (interpolationType == RpptInterpolationType::BILINEAR)
Expand Down
6 changes: 3 additions & 3 deletions src/modules/rppt_tensor_effects_augmentations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -951,7 +951,7 @@ RppStatus rppt_salt_and_pepper_noise_gpu(RppPtr_t srcPtr,
xorwowInitialState.counter = 0x64F0C9 + seed;

RpptXorwowState *d_xorwowInitialStatePtr;
d_xorwowInitialStatePtr = (RpptXorwowState *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.maskArr.floatmem;
d_xorwowInitialStatePtr = (RpptXorwowState *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemcpy(d_xorwowInitialStatePtr, &xorwowInitialState, sizeof(RpptXorwowState), hipMemcpyHostToDevice);

if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
Expand Down Expand Up @@ -1036,7 +1036,7 @@ RppStatus rppt_shot_noise_gpu(RppPtr_t srcPtr,
xorwowInitialState.boxMullerExtra = 0.0f;

RpptXorwowStateBoxMuller *d_xorwowInitialStatePtr;
d_xorwowInitialStatePtr = (RpptXorwowStateBoxMuller *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.maskArr.floatmem;
d_xorwowInitialStatePtr = (RpptXorwowStateBoxMuller *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemcpy(d_xorwowInitialStatePtr, &xorwowInitialState, sizeof(RpptXorwowStateBoxMuller), hipMemcpyHostToDevice);

if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
Expand Down Expand Up @@ -1119,7 +1119,7 @@ RppStatus rppt_gaussian_noise_gpu(RppPtr_t srcPtr,
xorwowInitialState.boxMullerExtra = 0.0f;

RpptXorwowStateBoxMuller *d_xorwowInitialStatePtr;
d_xorwowInitialStatePtr = (RpptXorwowStateBoxMuller *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.maskArr.floatmem;
d_xorwowInitialStatePtr = (RpptXorwowStateBoxMuller *) rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem;
hipMemcpy(d_xorwowInitialStatePtr, &xorwowInitialState, sizeof(RpptXorwowStateBoxMuller), hipMemcpyHostToDevice);

if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8))
Expand Down
12 changes: 6 additions & 6 deletions src/modules/rppt_tensor_geometric_augmentations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr,
srcDescPtr,
static_cast<Rpp8u*>(dstPtr) + dstDescPtr->offsetInBytes,
dstDescPtr,
rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem,
rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost,
tempDescPtr,
dstImgSizes,
roiTensorPtrSrc,
Expand All @@ -541,7 +541,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr,
srcDescPtr,
static_cast<Rpp32f*>(dstPtr) + dstDescPtr->offsetInBytes,
dstDescPtr,
rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem,
rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost,
tempDescPtr,
dstImgSizes,
roiTensorPtrSrc,
Expand All @@ -556,7 +556,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr,
srcDescPtr,
static_cast<Rpp8s*>(dstPtr) + dstDescPtr->offsetInBytes,
dstDescPtr,
rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem,
rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost,
tempDescPtr,
dstImgSizes,
roiTensorPtrSrc,
Expand All @@ -571,7 +571,7 @@ RppStatus rppt_resize_host(RppPtr_t srcPtr,
srcDescPtr,
static_cast<Rpp16f*>(dstPtr) + dstDescPtr->offsetInBytes,
dstDescPtr,
rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem,
rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost,
tempDescPtr,
dstImgSizes,
roiTensorPtrSrc,
Expand Down Expand Up @@ -789,7 +789,7 @@ RppStatus rppt_rotate_host(RppPtr_t srcPtr,
RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c);

// Compute affine transformation matrix from rotate angle
Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem;
Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost;
for(int idx = 0; idx < srcDescPtr->n; idx++)
{
Rpp32f angleInRad = angle[idx] * PI_OVER_180;
Expand Down Expand Up @@ -1639,7 +1639,7 @@ RppStatus rppt_rotate_gpu(RppPtr_t srcPtr,
return RPP_ERROR_NOT_IMPLEMENTED;

// Compute affine transformation matrix from rotate angle
Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.tempFloatmem;
Rpp32f *affineTensor = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost;
for(int idx = 0; idx < srcDescPtr->n; idx++)
{
Rpp32f angleInRad = angle[idx] * PI_OVER_180;
Expand Down

0 comments on commit 5a8fb2f

Please sign in to comment.