Skip to content

Commit

Permalink
Adding openvx changes for downmix node
Browse files Browse the repository at this point in the history
  • Loading branch information
SundarRajan28 committed Mar 13, 2024
1 parent 1e89c02 commit d53f81d
Show file tree
Hide file tree
Showing 7 changed files with 261 additions and 7 deletions.
1 change: 1 addition & 0 deletions amd_openvx_extensions/amd_rpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ list(APPEND SOURCES
source/tensor/WarpAffine.cpp
source/tensor/SequenceRearrange.cpp
source/tensor/PreemphasisFilter.cpp
source/tensor/Downmix.cpp
source/kernel_rpp.cpp
source/internal_publishKernels.cpp
)
Expand Down
16 changes: 9 additions & 7 deletions amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ vx_status Vignette_Register(vx_context);
vx_status WarpAffine_Register(vx_context);
vx_status SequenceRearrange_Register(vx_context);
vx_status PreemphasisFilter_Register(vx_context);
vx_status Downmix_Register(vx_context);

// kernel names
#define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD"
Expand Down Expand Up @@ -274,12 +275,13 @@ vx_status PreemphasisFilter_Register(vx_context);
#define VX_KERNEL_RPP_PIXELATE_NAME "org.rpp.Pixelate"
#define VX_KERNEL_RPP_VIGNETTE_NAME "org.rpp.Vignette"
#define VX_KERNEL_RPP_WARPAFFINE_NAME "org.rpp.WarpAffine"
#define VX_KERNEL_RPP_BRIGHTNESS_NAME "org.rpp.Brightness"
#define VX_KERNEL_RPP_COPY_NAME "org.rpp.Copy"
#define VX_KERNEL_RPP_CROPMIRRORNORMALIZE_NAME "org.rpp.CropMirrorNormalize"
#define VX_KERNEL_RPP_NOP_NAME "org.rpp.Nop"
#define VX_KERNEL_RPP_RESIZE_NAME "org.rpp.Resize"
#define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange"
#define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME "org.rpp.PreemphasisFilter"
#define VX_KERNEL_RPP_BRIGHTNESS_NAME "org.rpp.Brightness"
#define VX_KERNEL_RPP_COPY_NAME "org.rpp.Copy"
#define VX_KERNEL_RPP_CROPMIRRORNORMALIZE_NAME "org.rpp.CropMirrorNormalize"
#define VX_KERNEL_RPP_NOP_NAME "org.rpp.Nop"
#define VX_KERNEL_RPP_RESIZE_NAME "org.rpp.Resize"
#define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange"
#define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME "org.rpp.PreemphasisFilter"
#define VX_KERNEL_RPP_DOWNMIX_NAME "org.rpp.Downmix"

#endif //_AMDVX_EXT__PUBLISH_KERNELS_H_
1 change: 1 addition & 0 deletions amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ extern "C"
VX_KERNEL_RPP_VIGNETTE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x72,
VX_KERNEL_RPP_WARPAFFINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x73,
VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74
VX_KERNEL_RPP_DOWNMIX = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75,
};

#ifdef __cplusplus
Expand Down
9 changes: 9 additions & 0 deletions amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1876,6 +1876,15 @@ extern "C"
* \return A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
*/
SHARED_PUBLIC vx_node VX_API_CALL vxExtRppPreemphasisFilter(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_array preemphCoeff, vx_scalar borderType);
/*! \brief [Graph] Applies downmixing to the input tensor.
* \ingroup group_amd_rpp
* \param [in] graph The handle to the graph.
* \param [in] pSrc The input tensor in <tt>\ref VX_TYPE_UINT8</tt> or <tt>\ref VX_TYPE_FLOAT32</tt> or <tt>\ref VX_TYPE_FLOAT16</tt> or <tt>\ref VX_TYPE_INT8</tt> format data.
* \param [out] pDst The output tensor in <tt>\ref VX_TYPE_UINT8</tt> or <tt>\ref VX_TYPE_FLOAT32</tt> or <tt>\ref VX_TYPE_FLOAT16</tt> or <tt>\ref VX_TYPE_INT8</tt> format data.
* \param [in] pDstRoi The output tensor of batch size in <tt>unsigned int<tt> containing the roi values for the input in xywh/ltrb format.
* \return A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
*/
SHARED_PUBLIC vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi);
#ifdef __cplusplus
}
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ vx_status get_kernels_to_publish()
STATUS_ERROR_CHECK(ADD_KERNEL(Vignette_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(WarpAffine_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(PreemphasisFilter_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(Downmix_Register));

return status;
}
Expand Down
18 changes: 18 additions & 0 deletions amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2561,6 +2561,24 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppPreemphasisFilter(vx_graph graph, vx_te
return node;
}

VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi)
{
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
if (vxGetStatus((vx_reference)context) == VX_SUCCESS)
{
vx_uint32 dev_type = getGraphAffinity(graph);
vx_scalar devType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type);
vx_reference params[] = {
(vx_reference)pSrc,
(vx_reference)pDst,
(vx_reference)srcRoi,
(vx_reference)devType};
node = createNode(graph, VX_KERNEL_RPP_DOWNMIX, params, 4);
}
return node;
}

RpptDataType getRpptDataType(vx_enum vxDataType) {
switch(vxDataType) {
case vx_type_e::VX_TYPE_FLOAT32:
Expand Down
222 changes: 222 additions & 0 deletions amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
/*
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

#include "internal_publishKernels.h"
#include "vx_ext_amd.h"

struct DownmixLocalData
{
vxRppHandle *handle;
Rpp32u device_type;
RppPtr_t pSrc;
RppPtr_t pDst;
vx_int32 *pSamples;
vx_int32 *pChannels;
RpptDescPtr pSrcDesc;
RpptDescPtr pDstDesc;
RpptDesc srcDesc;
RpptDesc dstDesc;
size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
};

static vx_status VX_CALLBACK refreshDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num, DownmixLocalData *data) {
vx_status status = VX_SUCCESS;
void *roi_tensor_ptr_src;
if (data->device_type == AGO_TARGET_AFFINITY_GPU)
{
#if ENABLE_HIP
return VX_ERROR_NOT_IMPLEMENTED;
}
#endif
if (data->device_type == AGO_TARGET_AFFINITY_CPU)
{
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src)));
}
RpptROI *src_roi = reinterpret_cast<RpptROI *>(roi_tensor_ptr_src);
for(int n = 0; n < data->inputTensorDims[0] ; n++) {
data->pSamples[n] = src_roi[n].xywhROI.xy.x;
data->pChannels[n] = src_roi[n].xywhROI.xy.y;
}
return status;
}

static vx_status VX_CALLBACK validateDownmix(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
vx_status status = VX_SUCCESS;
vx_enum scalar_type;
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
if (scalar_type != VX_TYPE_UINT32)
return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type);

// Check for input parameters
size_t num_tensor_dims;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
if(num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Downmix: tensor: #0 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims);

// Check for output parameters
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
if(num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Downmix: tensor: #1 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims);

vx_uint8 tensor_fixed_point_position;
size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
vx_enum tensor_datatype;

STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype)));
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
return status;
}

static vx_status VX_CALLBACK processDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num) {
RppStatus rpp_status = RPP_SUCCESS;
vx_status return_status = VX_SUCCESS;
DownmixLocalData *data = NULL;
STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
if (data->device_type == AGO_TARGET_AFFINITY_GPU)
{
#if ENABLE_HIP
return_status = VX_ERROR_NOT_IMPLEMENTED;
}
#endif
if (data->device_type == AGO_TARGET_AFFINITY_CPU)
{
refreshDownmix(node, parameters, num, data);
rpp_status = rppt_down_mixing_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, data->pSamples, data->pChannels, false, data->handle->rppHandle);
return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
}
return return_status;
}

static vx_status VX_CALLBACK initializeDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num) {
DownmixLocalData *data = new DownmixLocalData;
memset(data, 0, sizeof(DownmixLocalData));

vx_enum input_tensor_datatype, output_tensor_datatype;

STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));

// Querying for input tensor
data->pSrcDesc = new RpptDesc;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_datatype, sizeof(input_tensor_datatype)));
data->pSrcDesc->dataType = getRpptDataType(input_tensor_datatype);
data->pSrcDesc->offsetInBytes = 0;
fillAudioDescriptionPtrFromDims(data->pSrcDesc, data->inputTensorDims);

// Querying for output tensor
data->pDstDesc = new RpptDesc;
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims)));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims));
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1],VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype)));
data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype);
data->pDstDesc->offsetInBytes = 0;
fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims);

data->pSamples = new vx_int32[data->pSrcDesc->n];
data->pChannels = new vx_int32[data->pSrcDesc->n];

refreshDownmix(node, parameters, num, data);
STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->device_type));

STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
return VX_SUCCESS;
}

static vx_status VX_CALLBACK uninitializeDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num) {
DownmixLocalData *data;
STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type));
delete(data->pSamples);
delete(data->pChannels);
delete(data);
return VX_SUCCESS;
}

//! \brief The kernel target support callback.
// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph
static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
) {
vx_context context = vxGetContext((vx_reference)graph);
AgoTargetAffinityInfo affinity;
vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
else
supported_target_affinity = AGO_TARGET_AFFINITY_CPU;

// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes

return VX_SUCCESS;
}

vx_status Downmix_Register(vx_context context) {
vx_status status = VX_SUCCESS;
// Add kernel to the context with callbacks
vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Downmix",
VX_KERNEL_RPP_DOWNMIX,
processDownmix,
4,
validateDownmix,
initializeDownmix,
uninitializeDownmix);
ERROR_CHECK_OBJECT(kernel);
AgoTargetAffinityInfo affinity;
vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
#if ENABLE_HIP
// enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers
vx_bool enableBufferAccess = vx_true_e;
if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
#else
vx_bool enableBufferAccess = vx_false_e;
#endif
amd_kernel_query_target_support_f query_target_support_f = query_target_support;

if (kernel)
{
STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
// PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
}
if (status != VX_SUCCESS)
{
exit:
vxRemoveKernel(kernel);
return VX_FAILURE;
}

return status;
}

0 comments on commit d53f81d

Please sign in to comment.