From 5ffdf19c00716a555fa69ae4333f882b6ab35827 Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Mon, 14 Oct 2024 21:44:10 -0700 Subject: [PATCH 1/2] [xnnpack] Add qp8 XNNP flag only for qb4w convert For testing, enable Kleidi using the following changes and run the llama models on Android with 8da4w and 8da8w linears. ``` -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DEXECUTORCH_XNNPACK_ENABLE_KLEIDI=ON \ `` --- backends/xnnpack/runtime/XNNCompiler.cpp | 267 +++++++++++++++++++---- 1 file changed, 219 insertions(+), 48 deletions(-) diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp index 1080da0beae..b948aa8623d 100644 --- a/backends/xnnpack/runtime/XNNCompiler.cpp +++ b/backends/xnnpack/runtime/XNNCompiler.cpp @@ -56,7 +56,8 @@ using DataType = fb_xnnpack::XNNDatatype; using DefineNodeFunc = Error (*)( xnn_subgraph_t, const std::unordered_map&, - NodePtr) noexcept; + NodePtr, + const fb_xnnpack::XNNGraph*) noexcept; /* Convert a tensor from fp32 to bf16. @@ -512,6 +513,8 @@ Error defineTensor( return Error::Ok; }; +#define MAYBE_UNUSED(x) (void)(x) + /* Define serialized add node into the subgraph, using the remapped ids to map the serialized ids, to the new ids generated when defining @@ -520,7 +523,10 @@ the tensor value Error defineAddNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + std::pair min_max = getOutputMinMax(node); auto graph_node = node->xnode_union_as_XNNAdd(); xnn_status status = xnn_define_add2( @@ -547,7 +553,10 @@ Define Minimum operator Node into the subgraph Error defineMinimumNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNMinimum(); xnn_status status = xnn_define_minimum2( subgraph_ptr, @@ -572,7 +581,10 @@ Define subtract operator Node into the subgraph Error defineSubtractNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNSubtract(); std::pair min_max = getOutputMinMax(node); xnn_status status = xnn_define_subtract( @@ -600,7 +612,10 @@ Define Multiply operator Node into the subgraph Error defineMultiplyNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNMultiply(); std::pair min_max = getOutputMinMax(node); xnn_status status = xnn_define_multiply2( @@ -622,26 +637,83 @@ Error defineMultiplyNode( return Error::Ok; }; +#ifdef ENABLE_XNNPACK_KLEIDI +bool isQP8(const fb_xnnpack::XNNGraph* graph, const NodePtr node) { + assert(node->xnode_union_type() == fb_xnnpack::XNodeUnion::XNNConvert); + auto graph_node = node->xnode_union_as_XNNConvert(); + auto cvt_output_id = graph_node->output_id(); + + auto check_dtype = [graph](uint32_t id, DataType dtype) -> bool { + assert( + dtype == DataType::xnn_datatype_qdint8 || + dtype == DataType::xnn_datatype_qbint4); + for (auto value : *graph->xvalues()) { + if (value->xvalue_union_type() != + fb_xnnpack::XValueUnion::XNNQuantizedTensorValue) { + continue; + } + auto tensor = + value->xvalue_union_as_XNNQuantizedTensorValue()->tensor_value(); + if (tensor->id_out() == id) { + return tensor->datatype() == dtype; + } + } + return false; + }; + + // Check if the output tensor is qint8 else bail early. + if (!check_dtype(cvt_output_id, DataType::xnn_datatype_qdint8)) { + return false; + } + + // Find if the convert output is going to the right linear node. + // Assuming if we can find one valid linear node, then we can use QP8 + // for all the linear nodes consuming this convert output. + for (auto node : *graph->xnodes()) { + if (node->xnode_union_type() == fb_xnnpack::XNodeUnion::XNNFullyConnected) { + auto linear_node = node->xnode_union_as_XNNFullyConnected(); + if (linear_node->input1_id() == cvt_output_id) { + if (check_dtype( + linear_node->filter_id(), DataType::xnn_datatype_qbint4)) { + return true; + } + } + } + } + return false; +} +#endif // ENABLE_XNNPACK_KLEIDI + /* Define Convert operator Node into the subgraph */ Error defineConvertNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* flatbuffer_graph) noexcept { + MAYBE_UNUSED(flatbuffer_graph); auto graph_node = node->xnode_union_as_XNNConvert(); + + int32_t flags = graph_node->flags(); +#ifdef ENABLE_XNNPACK_KLEIDI +// This is not currently exposed at include/xnnpack.h yet once it is +// we can remove this runtime logic and do this ahead-of-time +#define XNN_FLAG_MAYBE_PACK_FOR_QB4W_GEMM 0x00000100; + if (isQP8(flatbuffer_graph, node)) { + flags |= XNN_FLAG_MAYBE_PACK_FOR_QB4W_GEMM; + ET_LOG( + Debug, + "Setting XNN_FLAG_MAYBE_PACK_FOR_QB4W_GEMM flag for convert node %i", + node->debug_handle()); + } +#endif + xnn_status status = xnn_define_convert( subgraph_ptr, remapped_ids.at(graph_node->input_id()), remapped_ids.at(graph_node->output_id()), -#ifdef ENABLE_XNNPACK_KLEIDI - // This maps to XNNPACK's XNN_FLAG_MAYBE_PACK_FOR_QB4W_GEMM - // however this is not currently exposed at top level - // xnnpack.h Header - 0x00000100); -#else - graph_node->flags()); -#endif + flags); ET_CHECK_OR_RETURN_ERROR( status == xnn_status_success, @@ -660,7 +732,10 @@ when defining the tensor values Error defineFullyConnectedNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNFullyConnected(); std::pair min_max = getOutputMinMax(node); xnn_status status = xnn_define_fully_connected( @@ -690,7 +765,10 @@ the tensor value Error defineClampNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + std::pair min_max = getOutputMinMax(node); auto graph_node = node->xnode_union_as_XNNClamp(); xnn_status status = xnn_define_clamp( @@ -719,7 +797,10 @@ the tensor value Error defineSoftmaxNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNSoftmax(); xnn_status status = xnn_define_softmax( subgraph_ptr, @@ -744,7 +825,10 @@ the tensor value Error defineSigmoidNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNSigmoid(); xnn_status status = xnn_define_sigmoid( subgraph_ptr, @@ -769,7 +853,10 @@ the tensor value Error defineFloorNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNFloor(); xnn_status status = xnn_define_floor( subgraph_ptr, @@ -789,7 +876,10 @@ Error defineFloorNode( Error defineGlobalAvgPooling2dNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNGlobalAvgPooling2d(); std::pair min_max = getOutputMinMax(node); xnn_status status = xnn_define_global_average_pooling_2d( @@ -812,7 +902,10 @@ Error defineGlobalAvgPooling2dNode( Error defineAvgPooling2dNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNAvgPooling2d(); std::pair min_max = getOutputMinMax(node); xnn_status status = xnn_define_average_pooling_2d( @@ -848,7 +941,10 @@ tensor value Error defineConv2dNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNConv2d(); std::pair min_max = getOutputMinMax(node); xnn_status status = xnn_define_convolution_2d( @@ -891,7 +987,10 @@ tensor value Error defineMaxPooling2dNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNMaxPooling2d(); std::pair min_max = getOutputMinMax(node); xnn_status status = xnn_define_max_pooling_2d( @@ -928,7 +1027,10 @@ Define serialized div node into the subgraph Error defineDivNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNDiv(); std::pair min_max = getOutputMinMax(node); xnn_status status = xnn_define_divide( @@ -957,7 +1059,10 @@ tensor value Error defineStaticTransposeNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNStaticTranspose(); // Get tensor dims, we need to convert the uint32_t* to size_t* @@ -987,7 +1092,10 @@ the tensor value Error defineStaticResizeBilinear2DNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + const fb_xnnpack::XNNStaticResizeBilinear2D* graph_node = node->xnode_union_as_XNNStaticResizeBilinear2D(); @@ -1016,7 +1124,10 @@ the tensor value Error defineStaticConstantPadNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + const fb_xnnpack::XNNStaticConstantPad* graph_node = node->xnode_union_as_XNNStaticConstantPad(); @@ -1051,7 +1162,10 @@ tensor value Error defineDepthwiseConv2dNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNDepthwiseConv2d(); std::pair min_max = getOutputMinMax(node); xnn_status status = xnn_define_depthwise_convolution_2d( @@ -1090,7 +1204,10 @@ Error defineDepthwiseConv2dNode( Error defineStaticReshapeNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNStaticReshape(); // Get tensor dims, we need to convert the uint32_t* to size_t* @@ -1121,7 +1238,10 @@ tensor value Error defineArgMaxPooling2dNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNArgMaxPooling2d(); xnn_status status = xnn_define_argmax_pooling_2d( @@ -1155,7 +1275,10 @@ tensor value Error defineSquareRootNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNSquareRoot(); xnn_status status = xnn_define_square_root( @@ -1182,7 +1305,10 @@ tensor value Error defineCeilingNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNCeiling(); xnn_status status = xnn_define_ceiling( @@ -1209,7 +1335,10 @@ tensor value Error defineHardswishNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNHardswish(); xnn_status status = xnn_define_hardswish( @@ -1236,7 +1365,10 @@ tensor value Error defineLeakyReLUNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNLeakyReLU(); xnn_status status = xnn_define_leaky_relu( @@ -1264,7 +1396,10 @@ tensor value Error defineMaximumNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNMaximum(); xnn_status status = xnn_define_maximum2( @@ -1291,7 +1426,10 @@ serialized ids, to the new ids generated when defining the tensor value Error defineNegateNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNNegate(); xnn_status status = xnn_define_negate( @@ -1317,7 +1455,10 @@ serialized ids to the new ids generated when defining the tensor value Error defineSquareNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNSquare(); xnn_status status = xnn_define_square( @@ -1343,7 +1484,10 @@ serialized ids to the new ids generated when defining the tensor value Error defineELUNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNELU(); xnn_status status = xnn_define_elu( @@ -1370,7 +1514,10 @@ serialized ids to the new ids generated when defining the tensor value Error defineAbsNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNAbs(); xnn_status status = xnn_define_abs( @@ -1397,7 +1544,10 @@ to the new ids generated when defining the tensor value Error definePReLUNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNPReLU(); xnn_status status = xnn_define_prelu( @@ -1425,7 +1575,10 @@ to the new ids generated when defining the tensor value Error defineConcatenate2Node( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNConcatenate2(); xnn_status status = xnn_define_concatenate2( @@ -1454,7 +1607,10 @@ to the new ids generated when defining the tensor value Error defineConcatenate3Node( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNConcatenate3(); xnn_status status = xnn_define_concatenate3( @@ -1484,7 +1640,10 @@ to the new ids generated when defining the tensor value Error defineConcatenate4Node( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNConcatenate4(); xnn_status status = xnn_define_concatenate4( @@ -1515,7 +1674,10 @@ to the new ids generated when defining the tensor value Error defineStaticSliceNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNStaticSlice(); std::vector offsets = flatbufferDimsToVector(graph_node->offsets()); @@ -1548,7 +1710,10 @@ to the new ids generated when defining the tensor value Error defineScaledDotProductAttentionNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNScaledDotProductAttention(); xnn_status status = xnn_define_scaled_dot_product_attention( @@ -1581,7 +1746,10 @@ to the new ids generated when defining the tensor value Error defineBatchMatrixMultiplyNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + auto graph_node = node->xnode_union_as_XNNBatchMatrixMultiply(); xnn_status status = xnn_define_batch_matrix_multiply( @@ -1609,7 +1777,10 @@ that has not yet been implemented Error defineNotImplementedNode( xnn_subgraph_t subgraph_ptr, const std::unordered_map& remapped_ids, - const NodePtr node) noexcept { + const NodePtr node, + const fb_xnnpack::XNNGraph* graph) noexcept { + MAYBE_UNUSED(graph); + ET_CHECK_OR_RETURN_ERROR( false, NotImplemented, @@ -1767,7 +1938,7 @@ ET_NODISCARD Error XNNCompiler::compileModel( for (auto node : *flatbuffer_graph->xnodes()) { err = getDefineNodeFunc(node->xnode_union_type())( - subgraph.get(), remapped_ids, node); + subgraph.get(), remapped_ids, node, flatbuffer_graph); if (err != Error::Ok) { return err; } From cc7c9369e5e7f475e28abbe7ce59ac09461fab50 Mon Sep 17 00:00:00 2001 From: Digant Desai Date: Tue, 15 Oct 2024 07:51:23 -0700 Subject: [PATCH 2/2] [xnnpack] Update cmake help message --- backends/xnnpack/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/xnnpack/CMakeLists.txt b/backends/xnnpack/CMakeLists.txt index 5d4eb2c7bbb..002d351155d 100644 --- a/backends/xnnpack/CMakeLists.txt +++ b/backends/xnnpack/CMakeLists.txt @@ -39,7 +39,7 @@ option(EXECUTORCH_XNNPACK_SHARED_WORKSPACE # Keeping this OFF by default due to regressions in decode # and model load with kleidi kernels option(EXECUTORCH_XNNPACK_ENABLE_KLEIDI - "Enable workspace sharing across different delegate instances" OFF) + "Enable Arm Kleidi kernels" OFF) if(EXECUTORCH_XNNPACK_SHARED_WORKSPACE) add_definitions(-DENABLE_XNNPACK_SHARED_WORKSPACE) endif()