-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
538 additions
and
118 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
81 changes: 81 additions & 0 deletions
81
src/plugins/intel_cpu/src/nodes/executors/acl/acl_executor.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// Copyright (C) 2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "acl_executor.hpp" | ||
#include "acl_utils.hpp" | ||
#include "nodes/executors/memory_arguments.hpp" | ||
#include "utils/debug_capabilities.h" | ||
|
||
namespace ov { | ||
namespace intel_cpu { | ||
|
||
ACLMemoryInfo ACLCommonExecutor::initTensorInfo(const MemoryPtr& memoryPtr, ACLTensorAttrs attrs) { | ||
auto acl_tensor_type = precisionToAclDataType(memoryPtr->getPrecision()); | ||
auto acl_tensor_layout = getAclDataLayoutByMemoryDesc(memoryPtr->getDescPtr()); | ||
|
||
ACLMemoryInfo aclMemoryInfo = nullptr; | ||
if (acl_tensor_type != arm_compute::DataType::UNKNOWN) { | ||
auto collapsed_dims = collapse_dims_to_max_rank(memoryPtr->getStaticDims(), attrs.maxDimsShape); | ||
auto acl_tensor_shape = shapeCast(collapsed_dims); | ||
if (attrs.hasLayoutTypeNHWC) { | ||
changeLayoutToNH_C({&acl_tensor_shape}); | ||
} | ||
aclMemoryInfo = std::make_shared<arm_compute::TensorInfo>( | ||
acl_tensor_shape, 1, | ||
acl_tensor_type, | ||
acl_tensor_layout); | ||
} | ||
return aclMemoryInfo; | ||
} | ||
|
||
ACLMemory ACLCommonExecutor::initTensor(const ACLMemoryInfo& aclMemoryInfo) { | ||
ACLMemory aclMemory = nullptr; | ||
if (aclMemoryInfo) { | ||
aclMemory = std::make_shared<arm_compute::Tensor>(); | ||
aclMemory->allocator()->init(*aclMemoryInfo); | ||
} | ||
return aclMemory; | ||
} | ||
|
||
bool ACLCommonExecutor::update(const MemoryArgs &memory) { | ||
for (auto& cpu_mem_ptr : memory) { | ||
// Initialize arm_compute::TensorInfo object | ||
auto aclTensorInfo = initTensorInfo(cpu_mem_ptr.second, aclTensorAttrs); | ||
// Initialize arm_compute::Tensor object | ||
aclMemoryMap[cpu_mem_ptr.first] = initTensor(aclTensorInfo); | ||
} | ||
|
||
// Update arm_compute::TensorInfo objects for specific ACL function | ||
auto tensorsInfoValidateStatus = updateTensorsInfo(aclMemoryMap); | ||
if (!tensorsInfoValidateStatus) { | ||
DEBUG_LOG("ACL operator validation was failed: ", tensorsInfoValidateStatus.error_description()); | ||
return false; | ||
} | ||
|
||
// Configure arm_compute::IFunction object | ||
configureThreadSafe([&] { | ||
iFunction = configureFunction(aclMemoryMap); | ||
}); | ||
return true; | ||
} | ||
|
||
void ACLCommonExecutor::execute(const MemoryArgs &memory) { | ||
for (auto& acl_tensor : aclMemoryMap) { | ||
if (acl_tensor.second) { | ||
acl_tensor.second->allocator()->import_memory(memory.at(acl_tensor.first)->getData()); | ||
} | ||
} | ||
iFunction->run(); | ||
} | ||
|
||
ACLCommonExecutor::~ACLCommonExecutor() { | ||
for (auto& acl_tensor : aclMemoryMap) { | ||
if (acl_tensor.second) { | ||
acl_tensor.second->allocator()->free(); | ||
} | ||
} | ||
} | ||
|
||
} // namespace intel_cpu | ||
} // namespace ov |
52 changes: 52 additions & 0 deletions
52
src/plugins/intel_cpu/src/nodes/executors/acl/acl_executor.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Copyright (C) 2018-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "cpu_memory.h" | ||
#include "nodes/executors/executor.hpp" | ||
#include "arm_compute/runtime/NEON/NEFunctions.h" | ||
|
||
namespace ov { | ||
namespace intel_cpu { | ||
|
||
using ACLMemoryInfo = std::shared_ptr<arm_compute::TensorInfo>; | ||
using ACLMemory = std::shared_ptr<arm_compute::Tensor>; | ||
using ACLMemoryMap = std::unordered_map<int, ACLMemory>; | ||
using ACLFunction = std::unique_ptr<arm_compute::IFunction>; | ||
|
||
struct ACLTensorAttrs { | ||
bool hasLayoutTypeNHWC = false; | ||
size_t maxDimsShape = arm_compute::MAX_DIMS; | ||
}; | ||
|
||
class ACLCommonExecutor : public Executor { | ||
public: | ||
virtual arm_compute::Status updateTensorsInfo(const ACLMemoryMap& acl_memory) { | ||
OPENVINO_THROW_NOT_IMPLEMENTED("This version of the 'updateTensorsInfo' method is not implemented by executor"); | ||
} | ||
virtual ACLFunction configureFunction(const ACLMemoryMap& acl_memory) { | ||
OPENVINO_THROW_NOT_IMPLEMENTED("This version of the 'configureFunction' method is not implemented by executor"); | ||
} | ||
impl_desc_type implType() const override { | ||
return impl_desc_type::acl; | ||
} | ||
void execute(const MemoryArgs& memory) final; | ||
bool update(const MemoryArgs& memory) final; | ||
~ACLCommonExecutor(); | ||
|
||
protected: | ||
ACLTensorAttrs aclTensorAttrs; | ||
|
||
private: | ||
ACLMemoryMap aclMemoryMap; | ||
ACLFunction iFunction = nullptr; | ||
static ACLMemoryInfo initTensorInfo(const MemoryPtr& memoryPtr, ACLTensorAttrs attrs); | ||
static ACLMemory initTensor(const ACLMemoryInfo& aclMemoryInfo); | ||
}; | ||
|
||
using ACLCommonExecutorPtr = std::shared_ptr<ACLCommonExecutor>; | ||
|
||
} // namespace intel_cpu | ||
} // namespace ov |
92 changes: 92 additions & 0 deletions
92
src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
// Copyright (C) 2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "acl_fullyconnected.hpp" | ||
#include "acl_utils.hpp" | ||
#include "nodes/executors/executor.hpp" | ||
#include "nodes/executors/memory_arguments.hpp" | ||
#include "utils/debug_capabilities.h" | ||
#include "nodes/executors/debug_messages.hpp" | ||
#include "nodes/executors/implementation_utils.hpp" | ||
|
||
namespace ov { | ||
namespace intel_cpu { | ||
|
||
ACLFullyConnectedExecutor::ACLFullyConnectedExecutor(const FCAttrs &attrs, const PostOps &postOps, | ||
const MemoryArgs &memory, | ||
const ExecutorContext::CPtr context) { | ||
aclTensorAttrs.hasLayoutTypeNHWC = memory.at(ARG_SRC)->getDescPtr()->hasLayoutType(LayoutType::nspc); | ||
fullyConnectedLayerInfo.weights_trained_layout = getAclDataLayoutByMemoryDesc(memory.at(ARG_WEI)->getDescPtr()); | ||
fullyConnectedLayerInfo.transpose_weights = !attrs.weightsNonTransposed; | ||
if (memory.at(ARG_SRC)->getPrecision() == ov::element::f16) { | ||
fullyConnectedLayerInfo.fp_mixed_precision = true; | ||
} | ||
|
||
// Add postops | ||
if (!postOps.empty() && postOps.size() == 1) { | ||
if (const auto activation = std::dynamic_pointer_cast<ActivationPostOp>(postOps[0])) { | ||
fullyConnectedLayerInfo.activation_info = getActivationLayerInfo(convertToEltwiseAlgorithm(activation->type()), | ||
activation->alpha(), | ||
activation->beta(), | ||
activation->gamma()); | ||
} | ||
} | ||
} | ||
|
||
bool ACLFullyConnectedExecutor::supports(const FCConfig &config) { | ||
VERIFY(one_of(srcType(config), ov::element::f16, ov::element::f32), UNSUPPORTED_SRC_PRECISIONS); | ||
VERIFY(postOpsNumbers(config) < 2, UNSUPPORTED_NUMBER_OF_POSTOPS); | ||
VERIFY(one_of(srcRank(config), 2U, 3U, 4U), UNSUPPORTED_SRC_RANK); | ||
VERIFY(one_of(weiRank(config), 2U, 3U), UNSUPPORTED_SRC_RANK); | ||
return true; | ||
} | ||
|
||
arm_compute::Status ACLFullyConnectedExecutor::updateTensorsInfo(const ACLMemoryMap& acl_memory) { | ||
auto wei_shape = acl_memory.at(ARG_WEI)->info()->tensor_shape(); | ||
if (wei_shape.num_dimensions() == 3U) { | ||
acl_memory.at(ARG_WEI)->info()->set_tensor_shape({wei_shape[0] * wei_shape[1], wei_shape[2]}); | ||
} | ||
|
||
auto src_shape = acl_memory.at(ARG_SRC)->info()->tensor_shape(); | ||
if (one_of(src_shape.num_dimensions(), 3U, 4U)) { | ||
acl_memory.at(ARG_SRC)->info()->set_tensor_shape({ | ||
acl_memory.at(ARG_WEI)->info()->tensor_shape()[0], | ||
src_shape.total_size() / acl_memory.at(ARG_WEI)->info()->tensor_shape()[0]}); | ||
} | ||
|
||
if (one_of(acl_memory.at(ARG_DST)->info()->tensor_shape().num_dimensions(), 3U, 4U)) { | ||
acl_memory.at(ARG_DST)->info()->set_tensor_shape({ | ||
acl_memory.at(ARG_WEI)->info()->tensor_shape()[1], | ||
acl_memory.at(ARG_SRC)->info()->tensor_shape()[1]}); | ||
} | ||
|
||
if (!fullyConnectedLayerInfo.transpose_weights) { | ||
arm_compute::TensorShape temp_weights_shape = acl_memory.at(ARG_WEI)->info()->tensor_shape(); | ||
std::swap(temp_weights_shape[0], temp_weights_shape[1]); | ||
acl_memory.at(ARG_WEI)->info()->set_tensor_shape(temp_weights_shape); | ||
} | ||
|
||
return arm_compute::NEFullyConnectedLayer::validate( | ||
acl_memory.at(ARG_SRC)->info(), | ||
acl_memory.at(ARG_WEI)->info(), | ||
acl_memory.at(ARG_BIAS) ? acl_memory.at(ARG_BIAS)->info() : nullptr, | ||
acl_memory.at(ARG_DST)->info(), | ||
fullyConnectedLayerInfo, | ||
weightsInfo); | ||
} | ||
|
||
ACLFunction ACLFullyConnectedExecutor::configureFunction(const ACLMemoryMap& acl_memory) { | ||
auto neFC = std::make_unique<arm_compute::NEFullyConnectedLayer>(); | ||
neFC->configure( | ||
acl_memory.at(ARG_SRC).get(), | ||
acl_memory.at(ARG_WEI).get(), | ||
acl_memory.at(ARG_BIAS).get(), | ||
acl_memory.at(ARG_DST).get(), | ||
fullyConnectedLayerInfo, | ||
weightsInfo); | ||
return neFC; | ||
} | ||
|
||
} // namespace intel_cpu | ||
} // namespace ov |
Oops, something went wrong.