Skip to content

Commit

Permalink
Add FullyConnected ACL executor
Browse files Browse the repository at this point in the history
  • Loading branch information
allnes committed Jun 21, 2024
1 parent 6a7c442 commit 3a13983
Show file tree
Hide file tree
Showing 12 changed files with 538 additions and 118 deletions.
121 changes: 19 additions & 102 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,66 +361,6 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
return acl_op;
};
break;
case Algorithm::EltwiseRelu:
if (aclEltwiseAttrs.alpha == 0) {
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
ActivationLayerInfo::ActivationFunction::RELU))
return false;
} else {
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
{ActivationLayerInfo::ActivationFunction::LEAKY_RELU, aclEltwiseAttrs.alpha}))
return false;
}
exec_func = [this]() -> std::unique_ptr<IFunction> {
auto acl_op = std::make_unique<NEActivationLayer>();
if (aclEltwiseAttrs.alpha == 0) {
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::RELU);
} else {
acl_op->configure(&srcTensors[0], &dstTensors[0],
{ActivationLayerInfo::ActivationFunction::LEAKY_RELU, aclEltwiseAttrs.alpha});
}
return acl_op;
};
break;
case Algorithm::EltwiseGeluErf:
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::GELU))
return false;
exec_func = [this]() -> std::unique_ptr<IFunction> {
auto acl_op = std::make_unique<NEActivationLayer>();
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::GELU);
return acl_op;
};
break;
case Algorithm::EltwiseElu:
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
{ActivationLayerInfo::ActivationFunction::ELU, aclEltwiseAttrs.alpha}))
return false;
exec_func = [this]() -> std::unique_ptr<IFunction> {
auto acl_op = std::make_unique<NEActivationLayer>();
acl_op->configure(&srcTensors[0], &dstTensors[0], {ActivationLayerInfo::ActivationFunction::ELU, aclEltwiseAttrs.alpha});
return acl_op;
};
break;
case Algorithm::EltwiseTanh:
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
{ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f}))
return false;
exec_func = [this]() -> std::unique_ptr<IFunction> {
auto acl_op = std::make_unique<NEActivationLayer>();
acl_op->configure(&srcTensors[0], &dstTensors[0],
{ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f});
return acl_op;
};
break;
case Algorithm::EltwiseSigmoid:
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::LOGISTIC))
return false;
exec_func = [this]() -> std::unique_ptr<IFunction> {
auto acl_op = std::make_unique<NEActivationLayer>();
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::LOGISTIC);
return acl_op;
};
break;
case Algorithm::EltwiseAbs:
if (!NEAbsLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0]))
return false;
Expand All @@ -430,24 +370,6 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
return acl_op;
};
break;
case Algorithm::EltwiseSqrt:
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::SQRT))
return false;
exec_func = [this]() -> std::unique_ptr<IFunction> {
auto acl_op = std::make_unique<NEActivationLayer>();
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::SQRT);
return acl_op;
};
break;
case Algorithm::EltwiseSoftRelu:
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::SOFT_RELU))
return false;
exec_func = [this]() -> std::unique_ptr<IFunction> {
auto acl_op = std::make_unique<NEActivationLayer>();
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::SOFT_RELU);
return acl_op;
};
break;
case Algorithm::EltwiseExp:
if (!NEExpLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0]))
return false;
Expand All @@ -457,28 +379,6 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
return acl_op;
};
break;
case Algorithm::EltwiseClamp:
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, aclEltwiseAttrs.beta, aclEltwiseAttrs.alpha}))
return false;
exec_func = [this]() -> std::unique_ptr<IFunction> {
auto acl_op = std::make_unique<NEActivationLayer>();
acl_op->configure(&srcTensors[0], &dstTensors[0],
{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, aclEltwiseAttrs.beta, aclEltwiseAttrs.alpha});
return acl_op;
};
break;
case Algorithm::EltwiseSwish:
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
{ActivationLayerInfo::ActivationFunction::SWISH, aclEltwiseAttrs.alpha}))
return false;
exec_func = [this]() -> std::unique_ptr<IFunction> {
auto acl_op = std::make_unique<NEActivationLayer>();
acl_op->configure(&srcTensors[0], &dstTensors[0],
{ActivationLayerInfo::ActivationFunction::SWISH, aclEltwiseAttrs.alpha});
return acl_op;
};
break;
case Algorithm::EltwisePrelu:
if (!NEPReluLayer::validate(&srcTensorsInfo[0], &srcTensorsInfo[1], &dstTensorsInfo[0]))
return false;
Expand All @@ -488,12 +388,29 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto
return acl_op;
};
break;
case Algorithm::EltwiseRelu:
case Algorithm::EltwiseGeluErf:
case Algorithm::EltwiseElu:
case Algorithm::EltwiseTanh:
case Algorithm::EltwiseSigmoid:
case Algorithm::EltwiseSqrt:
case Algorithm::EltwiseSoftRelu:
case Algorithm::EltwiseClamp:
case Algorithm::EltwiseSwish:
case Algorithm::EltwiseHswish:
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0], ActivationLayerInfo::ActivationFunction::HARD_SWISH))
if (!NEActivationLayer::validate(&srcTensorsInfo[0], &dstTensorsInfo[0],
getActivationLayerInfo(aclEltwiseAttrs.algorithm,
aclEltwiseAttrs.alpha,
aclEltwiseAttrs.beta,
aclEltwiseAttrs.gamma)))
return false;
exec_func = [this]() -> std::unique_ptr<IFunction> {
auto acl_op = std::make_unique<NEActivationLayer>();
acl_op->configure(&srcTensors[0], &dstTensors[0], ActivationLayerInfo::ActivationFunction::HARD_SWISH);
acl_op->configure(&srcTensors[0], &dstTensors[0],
getActivationLayerInfo(aclEltwiseAttrs.algorithm,
aclEltwiseAttrs.alpha,
aclEltwiseAttrs.beta,
aclEltwiseAttrs.gamma));
return acl_op;
};
break;
Expand Down
81 changes: 81 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_executor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "acl_executor.hpp"
#include "acl_utils.hpp"
#include "nodes/executors/memory_arguments.hpp"
#include "utils/debug_capabilities.h"

namespace ov {
namespace intel_cpu {

ACLMemoryInfo ACLCommonExecutor::initTensorInfo(const MemoryPtr& memoryPtr, ACLTensorAttrs attrs) {
auto acl_tensor_type = precisionToAclDataType(memoryPtr->getPrecision());
auto acl_tensor_layout = getAclDataLayoutByMemoryDesc(memoryPtr->getDescPtr());

ACLMemoryInfo aclMemoryInfo = nullptr;
if (acl_tensor_type != arm_compute::DataType::UNKNOWN) {
auto collapsed_dims = collapse_dims_to_max_rank(memoryPtr->getStaticDims(), attrs.maxDimsShape);
auto acl_tensor_shape = shapeCast(collapsed_dims);
if (attrs.hasLayoutTypeNHWC) {
changeLayoutToNH_C({&acl_tensor_shape});
}
aclMemoryInfo = std::make_shared<arm_compute::TensorInfo>(
acl_tensor_shape, 1,
acl_tensor_type,
acl_tensor_layout);
}
return aclMemoryInfo;
}

ACLMemory ACLCommonExecutor::initTensor(const ACLMemoryInfo& aclMemoryInfo) {
ACLMemory aclMemory = nullptr;
if (aclMemoryInfo) {
aclMemory = std::make_shared<arm_compute::Tensor>();
aclMemory->allocator()->init(*aclMemoryInfo);
}
return aclMemory;
}

bool ACLCommonExecutor::update(const MemoryArgs &memory) {
for (auto& cpu_mem_ptr : memory) {
// Initialize arm_compute::TensorInfo object
auto aclTensorInfo = initTensorInfo(cpu_mem_ptr.second, aclTensorAttrs);
// Initialize arm_compute::Tensor object
aclMemoryMap[cpu_mem_ptr.first] = initTensor(aclTensorInfo);
}

// Update arm_compute::TensorInfo objects for specific ACL function
auto tensorsInfoValidateStatus = updateTensorsInfo(aclMemoryMap);
if (!tensorsInfoValidateStatus) {
DEBUG_LOG("ACL operator validation was failed: ", tensorsInfoValidateStatus.error_description());
return false;
}

// Configure arm_compute::IFunction object
configureThreadSafe([&] {
iFunction = configureFunction(aclMemoryMap);
});
return true;
}

void ACLCommonExecutor::execute(const MemoryArgs &memory) {
for (auto& acl_tensor : aclMemoryMap) {
if (acl_tensor.second) {
acl_tensor.second->allocator()->import_memory(memory.at(acl_tensor.first)->getData());
}
}
iFunction->run();
}

ACLCommonExecutor::~ACLCommonExecutor() {
for (auto& acl_tensor : aclMemoryMap) {
if (acl_tensor.second) {
acl_tensor.second->allocator()->free();
}
}
}

} // namespace intel_cpu
} // namespace ov
52 changes: 52 additions & 0 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_executor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "cpu_memory.h"
#include "nodes/executors/executor.hpp"
#include "arm_compute/runtime/NEON/NEFunctions.h"

namespace ov {
namespace intel_cpu {

using ACLMemoryInfo = std::shared_ptr<arm_compute::TensorInfo>;
using ACLMemory = std::shared_ptr<arm_compute::Tensor>;
using ACLMemoryMap = std::unordered_map<int, ACLMemory>;
using ACLFunction = std::unique_ptr<arm_compute::IFunction>;

struct ACLTensorAttrs {
bool hasLayoutTypeNHWC = false;
size_t maxDimsShape = arm_compute::MAX_DIMS;
};

class ACLCommonExecutor : public Executor {
public:
virtual arm_compute::Status updateTensorsInfo(const ACLMemoryMap& acl_memory) {
OPENVINO_THROW_NOT_IMPLEMENTED("This version of the 'updateTensorsInfo' method is not implemented by executor");
}
virtual ACLFunction configureFunction(const ACLMemoryMap& acl_memory) {
OPENVINO_THROW_NOT_IMPLEMENTED("This version of the 'configureFunction' method is not implemented by executor");
}
impl_desc_type implType() const override {
return impl_desc_type::acl;
}
void execute(const MemoryArgs& memory) final;
bool update(const MemoryArgs& memory) final;
~ACLCommonExecutor();

protected:
ACLTensorAttrs aclTensorAttrs;

private:
ACLMemoryMap aclMemoryMap;
ACLFunction iFunction = nullptr;
static ACLMemoryInfo initTensorInfo(const MemoryPtr& memoryPtr, ACLTensorAttrs attrs);
static ACLMemory initTensor(const ACLMemoryInfo& aclMemoryInfo);
};

using ACLCommonExecutorPtr = std::shared_ptr<ACLCommonExecutor>;

} // namespace intel_cpu
} // namespace ov
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "acl_fullyconnected.hpp"
#include "acl_utils.hpp"
#include "nodes/executors/executor.hpp"
#include "nodes/executors/memory_arguments.hpp"
#include "utils/debug_capabilities.h"
#include "nodes/executors/debug_messages.hpp"
#include "nodes/executors/implementation_utils.hpp"

namespace ov {
namespace intel_cpu {

ACLFullyConnectedExecutor::ACLFullyConnectedExecutor(const FCAttrs &attrs, const PostOps &postOps,
const MemoryArgs &memory,
const ExecutorContext::CPtr context) {
aclTensorAttrs.hasLayoutTypeNHWC = memory.at(ARG_SRC)->getDescPtr()->hasLayoutType(LayoutType::nspc);
fullyConnectedLayerInfo.weights_trained_layout = getAclDataLayoutByMemoryDesc(memory.at(ARG_WEI)->getDescPtr());
fullyConnectedLayerInfo.transpose_weights = !attrs.weightsNonTransposed;
if (memory.at(ARG_SRC)->getPrecision() == ov::element::f16) {
fullyConnectedLayerInfo.fp_mixed_precision = true;
}

// Add postops
if (!postOps.empty() && postOps.size() == 1) {
if (const auto activation = std::dynamic_pointer_cast<ActivationPostOp>(postOps[0])) {
fullyConnectedLayerInfo.activation_info = getActivationLayerInfo(convertToEltwiseAlgorithm(activation->type()),
activation->alpha(),
activation->beta(),
activation->gamma());
}
}
}

bool ACLFullyConnectedExecutor::supports(const FCConfig &config) {
VERIFY(one_of(srcType(config), ov::element::f16, ov::element::f32), UNSUPPORTED_SRC_PRECISIONS);
VERIFY(postOpsNumbers(config) < 2, UNSUPPORTED_NUMBER_OF_POSTOPS);
VERIFY(one_of(srcRank(config), 2U, 3U, 4U), UNSUPPORTED_SRC_RANK);
VERIFY(one_of(weiRank(config), 2U, 3U), UNSUPPORTED_SRC_RANK);
return true;
}

arm_compute::Status ACLFullyConnectedExecutor::updateTensorsInfo(const ACLMemoryMap& acl_memory) {
auto wei_shape = acl_memory.at(ARG_WEI)->info()->tensor_shape();
if (wei_shape.num_dimensions() == 3U) {
acl_memory.at(ARG_WEI)->info()->set_tensor_shape({wei_shape[0] * wei_shape[1], wei_shape[2]});
}

auto src_shape = acl_memory.at(ARG_SRC)->info()->tensor_shape();
if (one_of(src_shape.num_dimensions(), 3U, 4U)) {
acl_memory.at(ARG_SRC)->info()->set_tensor_shape({
acl_memory.at(ARG_WEI)->info()->tensor_shape()[0],
src_shape.total_size() / acl_memory.at(ARG_WEI)->info()->tensor_shape()[0]});
}

if (one_of(acl_memory.at(ARG_DST)->info()->tensor_shape().num_dimensions(), 3U, 4U)) {
acl_memory.at(ARG_DST)->info()->set_tensor_shape({
acl_memory.at(ARG_WEI)->info()->tensor_shape()[1],
acl_memory.at(ARG_SRC)->info()->tensor_shape()[1]});
}

if (!fullyConnectedLayerInfo.transpose_weights) {
arm_compute::TensorShape temp_weights_shape = acl_memory.at(ARG_WEI)->info()->tensor_shape();
std::swap(temp_weights_shape[0], temp_weights_shape[1]);
acl_memory.at(ARG_WEI)->info()->set_tensor_shape(temp_weights_shape);
}

return arm_compute::NEFullyConnectedLayer::validate(
acl_memory.at(ARG_SRC)->info(),
acl_memory.at(ARG_WEI)->info(),
acl_memory.at(ARG_BIAS) ? acl_memory.at(ARG_BIAS)->info() : nullptr,
acl_memory.at(ARG_DST)->info(),
fullyConnectedLayerInfo,
weightsInfo);
}

ACLFunction ACLFullyConnectedExecutor::configureFunction(const ACLMemoryMap& acl_memory) {
auto neFC = std::make_unique<arm_compute::NEFullyConnectedLayer>();
neFC->configure(
acl_memory.at(ARG_SRC).get(),
acl_memory.at(ARG_WEI).get(),
acl_memory.at(ARG_BIAS).get(),
acl_memory.at(ARG_DST).get(),
fullyConnectedLayerInfo,
weightsInfo);
return neFC;
}

} // namespace intel_cpu
} // namespace ov
Loading

0 comments on commit 3a13983

Please sign in to comment.