From b45875d605d78ad37776285199875c4262bbbfb7 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 12 May 2023 16:51:47 +0200 Subject: [PATCH 01/28] InPlace memory direction resolution pass --- src/plugins/intel_cpu/src/graph.cpp | 107 ++++++++++++++++++++++++++++ src/plugins/intel_cpu/src/graph.h | 1 + 2 files changed, 108 insertions(+) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index ce54632f8ec662..9e02f2be456910 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -356,6 +356,13 @@ void Graph::InitGraph() { InitDescriptors(); + //TODO: move in a separate method + { + for (auto& node : graphNodes) { + resolveInPlaceDirection(node); + } + } + InitOptimalPrimitiveDescriptors(); InitEdges(); @@ -1571,6 +1578,7 @@ bool Graph::InsertNode(NodePtr parent, NodePtr child, NodePtr node, int parentPo node->initSupportedPrimitiveDescriptors(); node->filterSupportedPrimitiveDescriptors(); node->selectOptimalPrimitiveDescriptor(); + resolveInPlaceDirection(node); node->initOptimalPrimitiveDescriptor(); } @@ -1651,5 +1659,104 @@ std::shared_ptr Graph::dump() const { return dump_graph_as_ie_ngraph_net(*this); } +void Graph::resolveInPlaceDirection(const NodePtr& node) const { + enum InplaceDirectionType {UP, DOWN, CYCLIC, NONE}; + enum PortType {INPUT, OUTPUT}; + + auto inPlaceDirection = [](const NodePtr& node, PortType portType, int portNum) -> InplaceDirectionType { + if (PortType::INPUT == portType) { + auto inPlaceInpPort = node->inPlaceInputPort(portNum); + if (inPlaceInpPort >= 0) { + auto inPlaceOutPort = node->inPlaceOutPort(inPlaceInpPort); + if (inPlaceOutPort == inPlaceInpPort) { + return InplaceDirectionType::CYCLIC; + } else if (inPlaceOutPort < 0) { + return InplaceDirectionType::DOWN; + } else { + IE_THROW() << "Non trivial inPlace memory dependency has been detected"; + } + } + // the requested port has a negative inPlace tag, let's check whether it is referenced from the output + auto& config = node->getSelectedPrimitiveDescriptor()->getConfig(); + for (auto& portConf : config.outConfs) { + if (portConf.inPlace() == portNum) { + return InplaceDirectionType::UP; + } + } + } else if (PortType::OUTPUT == portType) { + auto inPlaceOutPort = node->inPlaceOutPort(portNum); + if (inPlaceOutPort >= 0) { + auto inPlaceInpPort = node->inPlaceInputPort(inPlaceOutPort); + if (inPlaceOutPort == inPlaceInpPort) { + return InplaceDirectionType::CYCLIC; + } else if (inPlaceInpPort < 0) { + return InplaceDirectionType::UP; + } else { + IE_THROW() << "Non trivial inPlace memory dependency has been detected"; + } + } + // the requested port has a negative inPlace tag, let's check whether it is referenced from the input + auto& config = node->getSelectedPrimitiveDescriptor()->getConfig(); + for (auto& portConf : config.inConfs) { + if (portConf.inPlace() == portNum) { + return InplaceDirectionType::DOWN; + } + } + } + return InplaceDirectionType::NONE; + }; + + auto& inpEdges = node->getParentEdges(); + for (auto& wEdge : inpEdges) { + if (auto pEdge = wEdge.lock()) { + auto inpPort = pEdge->getOutputNum(); + auto inPlaceInpPort = node->inPlaceInputPort(inpPort); + if (inPlaceInpPort >= 0 && inPlaceDirection(node, PortType::INPUT, inpPort) == InplaceDirectionType::CYCLIC) { + // inPlace memory cyclic dependency detected, need to resolve + // let's check the parent node first + auto pParent = pEdge->getParent(); + auto parentInPlaceDirection = inPlaceDirection(pParent, PortType::OUTPUT, pEdge->getInputNum()); + if (parentInPlaceDirection == InplaceDirectionType::UP) { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.inConfs[inpPort].inPlace(-1); + node->initDescriptor(config); + } else if (parentInPlaceDirection == InplaceDirectionType::DOWN) { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.outConfs[inPlaceInpPort].inPlace(-1); + node->initDescriptor(config); + } else { + // the parent node does not use inPlace memory, let's check children + std::function searchNonCyclicDirection; + searchNonCyclicDirection = [&](const NodePtr& node, int portIdx) -> InplaceDirectionType { + auto& childEdges = node->getChildEdgesAtPort(portIdx); + for (auto& edge : childEdges) { + auto pChild = edge->getChild(); + auto result = inPlaceDirection(pChild, PortType::INPUT, edge->getOutputNum()); + if (InplaceDirectionType::UP == result || InplaceDirectionType::DOWN == result) { + return result; + } else if (InplaceDirectionType::CYCLIC == result) { + return searchNonCyclicDirection(pChild, pChild->inPlaceInputPort(edge->getOutputNum())); + } + } + return InplaceDirectionType::NONE; + }; + auto result = searchNonCyclicDirection(node, inPlaceInpPort); + if (one_of(result, InplaceDirectionType::UP, InplaceDirectionType::NONE)) { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.inConfs[inpPort].inPlace(-1); + node->initDescriptor(config); + } else if (InplaceDirectionType::DOWN == result) { + auto config = node->getSelectedPrimitiveDescriptor()->getConfig(); + config.outConfs[inPlaceInpPort].inPlace(-1); + node->initDescriptor(config); + } else { + IE_THROW() << "A node without an inPlace memory cyclic dependency has not been found"; + } + } + } + } + } +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 1d0129785b811b..75f56c1688bf54 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -271,6 +271,7 @@ class Graph { int dynBatch = -1; void EnforceBF16(); + void resolveInPlaceDirection(const NodePtr& node) const; }; } // namespace intel_cpu From c20329b7dfeca3c2fd9edd90996af4ec0da8d228 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 12 May 2023 16:57:11 +0200 Subject: [PATCH 02/28] Partitioned mem mngr --- src/plugins/intel_cpu/src/cpu_memory.cpp | 4 +- src/plugins/intel_cpu/src/cpu_memory.h | 30 ++-- src/plugins/intel_cpu/src/dnnl_scratch_pad.h | 2 +- src/plugins/intel_cpu/src/edge.cpp | 135 ++++++------------ src/plugins/intel_cpu/src/edge.h | 2 +- src/plugins/intel_cpu/src/node.cpp | 37 ++++- src/plugins/intel_cpu/src/node.h | 5 +- .../intel_cpu/src/partitioned_mem_mgr.cpp | 67 +++++++++ .../intel_cpu/src/partitioned_mem_mgr.h | 36 +++++ 9 files changed, 207 insertions(+), 111 deletions(-) create mode 100644 src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp create mode 100644 src/plugins/intel_cpu/src/partitioned_mem_mgr.h diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 62d528315932b8..87fad83cf1b90b 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -135,11 +135,11 @@ void Memory::update() { } } -void Memory::Create(const MemoryDesc &desc, DnnlMemoryMngrPtr memMgr) { +void Memory::Create(const MemoryDesc &desc, MemoryMngrPtr memMgr) { Create(desc.clone(), memMgr); } -void Memory::Create(MemoryDescPtr desc, DnnlMemoryMngrPtr memMgr) { +void Memory::Create(MemoryDescPtr desc, MemoryMngrPtr memMgr) { mgrHandle = DnnlMemMngrHandle(memMgr, this); bool memAllocated = mgrHandle->getRawPtr(); diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index a49c30eee420a7..ab6d9cbff1e530 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -89,18 +89,24 @@ class MemoryMngrWithReuse : public IMemoryMngr { static void destroy(void *ptr); }; +class IMemoryMngrObserver : public IMemoryMngr { +public: + virtual void registerMemory(Memory* memPtr) = 0; + virtual void unregisterMemory(Memory* memPtr) = 0; +}; + /** * @brief A proxy object that additionally implements observer pattern */ -class DnnlMemoryMngr : public IMemoryMngr { +class DnnlMemoryMngr : public IMemoryMngrObserver { public: explicit DnnlMemoryMngr(std::unique_ptr mngr) : _pMemMngr(std::move(mngr)) {} void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; bool resize(size_t size) override; bool hasExtBuffer() const noexcept override; - void registerMemory(Memory* memPtr); - void unregisterMemory(Memory* memPtr); + void registerMemory(Memory* memPtr) override; + void unregisterMemory(Memory* memPtr) override; private: void notifyUpdate(); @@ -110,12 +116,12 @@ class DnnlMemoryMngr : public IMemoryMngr { std::unique_ptr _pMemMngr; }; -using DnnlMemoryMngrPtr = std::shared_ptr; -using DnnlMemoryMngrCPtr = std::shared_ptr; +using MemoryMngrPtr = std::shared_ptr; +using MemoryMngrCPtr = std::shared_ptr; class DnnlMemMngrHandle { public: - DnnlMemMngrHandle(DnnlMemoryMngrPtr pMgr, Memory* pMem) : _pMgr(pMgr), _pMem(pMem) { + DnnlMemMngrHandle(MemoryMngrPtr pMgr, Memory* pMem) : _pMgr(pMgr), _pMem(pMem) { if (_pMgr) { _pMgr->registerMemory(_pMem); } @@ -140,16 +146,16 @@ class DnnlMemMngrHandle { } } - DnnlMemoryMngrPtr get() const { + MemoryMngrPtr get() const { return _pMgr; } - DnnlMemoryMngrPtr::element_type* operator->() const noexcept { + MemoryMngrPtr::element_type* operator->() const noexcept { return _pMgr.get(); } private: - DnnlMemoryMngrPtr _pMgr = nullptr; + MemoryMngrPtr _pMgr = nullptr; Memory* _pMem = nullptr; }; @@ -233,8 +239,8 @@ class Memory { void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); void Create(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); - void Create(const MemoryDesc& desc, DnnlMemoryMngrPtr memMgr); - void Create(MemoryDescPtr desc, DnnlMemoryMngrPtr memMgr); + void Create(const MemoryDesc& desc, MemoryMngrPtr memMgr); + void Create(MemoryDescPtr desc, MemoryMngrPtr memMgr); // Redefines descriptor. The memory descriptor will be replaced with the new one. // Memory will not be reallocated if the new tensor size is less or equal the upper bound. @@ -256,7 +262,7 @@ class Memory { return mgrHandle->hasExtBuffer(); } - DnnlMemoryMngrPtr getDnnlMemoryMngr() const { + MemoryMngrPtr getMemoryMngr() const { return mgrHandle.get(); } diff --git a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h index 79157ec5bbd709..390cf363f3e1e8 100644 --- a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h +++ b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h @@ -14,7 +14,7 @@ namespace ov { namespace intel_cpu { class DnnlScratchPad { - DnnlMemoryMngrPtr mgrPtr; + MemoryMngrPtr mgrPtr; dnnl::engine eng; public: diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 73e76566678c31..21fff04b955b08 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -82,7 +82,6 @@ void Edge::collectConsumers(std::vector& result) const { } bool Edge::enforceReorder() { - bool canBeInPlaceConflicts = false; auto parentNode = getParent(); auto parentSPD = parentNode->getSelectedPrimitiveDescriptor(); auto childNode = getChild(); @@ -90,34 +89,41 @@ bool Edge::enforceReorder() { if (!parentSPD || !childSPD) IE_THROW() << "Cannot make a decision about reorder. Primitive descriptors weren't selected."; - auto childCanChangeMem = [](const Edge& edge) { + bool in_place = inPlace(); + + if (in_place) { + if (inPlace(LOOK_DOWN) && inPlace(LOOK_UP)) { + return true; + } + } + + auto childCanModifyMem = [](const Edge& edge) { bool result = false; int outNumber = edge.getOutputNum(); if (auto childSPD = edge.getChild()->getSelectedPrimitiveDescriptor()) { result = childSPD->getConfig().outConfs.empty(); for (const auto& conf : childSPD->getConfig().outConfs) { - if (conf.inPlace() == outNumber && outNumber >= 0) - result = true; + if (outNumber >= 0 && conf.inPlace() == outNumber && edge.getChild()->isExecutable()) + return true; } } return result; }; - const auto& detectInPlaceChildrenNum = [&childCanChangeMem](const std::vector& edges) -> size_t { + const auto& detectInPlaceChildrenNum = [&childCanModifyMem](const std::vector& edges) -> size_t { size_t count = 0; for (const auto& edge : edges) { - if (childCanChangeMem(*edge)) { + if (childCanModifyMem(*edge)) { count++; } } return count; }; - bool in_place = inPlace(); int inNumber = getInputNum(); const auto portChildEdges = parentNode->getChildEdgesAtPort(inNumber); - if (childCanChangeMem(*this) && portChildEdges.size() > 1) { + if (childCanModifyMem(*this) && portChildEdges.size() > 1) { if (childNode->getType() == Type::Convolution) { auto execIndex = childNode->getExecIndex(); for (auto pEdgePeer : portChildEdges) { @@ -128,40 +134,25 @@ bool Edge::enforceReorder() { for (auto node : vecConsumers) { if (node->getExecIndex() >= execIndex) { - canBeInPlaceConflicts = true; - break; + return true; } } - if (canBeInPlaceConflicts) break; } } else if (in_place && detectInPlaceChildrenNum(portChildEdges) > 1) { - canBeInPlaceConflicts = true; + return true; } } - if (!canBeInPlaceConflicts && in_place && !parentNode->getChildEdges().empty()) { - for (auto& p_edge_peer : portChildEdges) { - if (p_edge_peer.get() == this) - continue; - if (p_edge_peer->getChild()->getType() != Type::Reorder && p_edge_peer->inPlace(LOOK_DOWN)) { - canBeInPlaceConflicts = true; - break; - } - } - } - - if (in_place) { - int outNumber = getOutputNum(); - if (inNumber >= 0 && static_cast(inNumber) < parentSPD->getConfig().outConfs.size() && - parentSPD->getConfig().outConfs[inNumber].inPlace() >= 0 && outNumber >= 0 && - static_cast(outNumber) < childSPD->getConfig().inConfs.size() && - childSPD->getConfig().inConfs[outNumber].inPlace() >= 0) - canBeInPlaceConflicts = true; - } - - if (canBeInPlaceConflicts) { - return true; - } + // if (!canBeInPlaceConflicts && in_place && !parentNode->getChildEdges().empty()) { + // for (auto& p_edge_peer : portChildEdges) { + // if (p_edge_peer.get() == this) + // continue; + // if (p_edge_peer->getChild()->getType() != Type::Reorder && p_edge_peer->inPlace(LOOK_DOWN)) { + // canBeInPlaceConflicts = true; + // break; + // } + // } + // } // In case the parent node is an input constant, the memory is unaligned and the child primitive isa is SSE, // we have to insert reorder since the vast majority of arithmetic and data processing instructions in legacy SSE isa requires @@ -331,7 +322,7 @@ void Edge::allocate(const void* mem_ptr) { allocateCommon(allocateFunc); } -void Edge::allocate(DnnlMemoryMngrPtr memMngr) { +void Edge::allocate(MemoryMngrPtr memMngr) { if (!memMngr) { IE_THROW(Unexpected) << "Memory manager ptr is NULL"; } @@ -477,7 +468,7 @@ MemoryPtr &Edge::getMemoryPtr() { memoryPtr->Create(desc, sharedEdge->getMemoryPtr()->GetData()); DEBUG_LOG(*this, " const sharedEdge with ", *sharedEdge); } else { - memoryPtr->Create(desc, sharedEdge->getMemoryPtr()->getDnnlMemoryMngr()); + memoryPtr->Create(desc, sharedEdge->getMemoryPtr()->getMemoryMngr()); DEBUG_LOG(*this, " sharedEdge with ", *sharedEdge); } memoryFromEdge.reset(); @@ -559,91 +550,55 @@ void Edge::init() { * @return root of view-on-memory subgraph */ EdgePtr Edge::getBaseEdge(int look) { - auto parentConfig = getParent()->getSelectedPrimitiveDescriptor()->getConfig(); - auto childConfig = getChild()->getSelectedPrimitiveDescriptor()->getConfig(); - int inputNum = getInputNum(); - int outputNum = getOutputNum(); + const int inputNum = getInputNum(); + const int outputNum = getOutputNum(); - if (childConfig.inConfs[outputNum].inPlace() >= 0 && parentConfig.outConfs[inputNum].inPlace() >= 0) { - // in case of parentConfig requiring upstream-inplace and childConfig supports downstream-inplace - // must further check whether childConfig also supports upstream inplace, - // if so, we can safely inplace as upstream - auto down_stream_inplace = childConfig.inConfs[outputNum].inPlace(); - int up_stream_inplace = -1; - if (down_stream_inplace >= 0) - up_stream_inplace = childConfig.outConfs[down_stream_inplace].inPlace(); - - if ((up_stream_inplace >= 0) && (look & LOOK_UP)) { - look = LOOK_UP; - } else { - DEBUG_LOG(*this, " Danger: Inplace assumption will be broken!"); - inputNum = getInputNum(); - return getParent()->getChildEdgeAt(inputNum); - } - } + const int parentInPlacePort = getParent()->inPlaceOutPort(inputNum); + const int childInPlacePort = getChild()->inPlaceInputPort(outputNum); - if (childConfig.inConfs[outputNum].inPlace() >= 0 && (look & LOOK_DOWN)) { - int next_port_idx = childConfig.inConfs[outputNum].inPlace(); - if (childConfig.outConfs[next_port_idx].inPlace() >= 0) { - childConfig.outConfs[next_port_idx].inPlace(-1); - getChild()->initDescriptor(childConfig); - } + IE_ASSERT(!(parentInPlacePort >=0 && childInPlacePort >= 0)) << + "Unresolved in place memory conflict detected on edge: " << name(); - auto ch_edges = getChild()->getChildEdgesAtPort(next_port_idx); + if ((childInPlacePort >= 0) && (look & LOOK_DOWN)) { + auto ch_edges = getChild()->getChildEdgesAtPort(childInPlacePort); auto &next_ch_edge = ch_edges[0]; // Multiple connection to some out port // Will try to find inplace consumer for (auto &ch_edge : ch_edges) { - auto &chch_conf = ch_edge->getChild()->getSelectedPrimitiveDescriptor()->getConfig(); - - if (chch_conf.inConfs[ch_edge->getOutputNum()].inPlace() >= 0) { + if (ch_edge->getChild()->inPlaceInputPort(ch_edge->getOutputNum()) >= 0) { next_ch_edge = ch_edge; // To align with upstream-inplace, we stop searching once found the first inplace consumer break; } } return next_ch_edge->getBaseEdge(LOOK_DOWN); - } else if (parentConfig.outConfs[inputNum].inPlace() >= 0 && (look & LOOK_UP)) { - int next_port_idx = parentConfig.outConfs[inputNum].inPlace(); - if (parentConfig.inConfs[next_port_idx].inPlace() >= 0) { - parentConfig.inConfs[next_port_idx].inPlace(-1); - getParent()->initDescriptor(parentConfig); - } - return getParent()->getParentEdgesAtPort(next_port_idx)[0]->getBaseEdge(LOOK_UP); + } else if (parentInPlacePort >= 0 && (look & LOOK_UP)) { + return getParent()->getParentEdgesAtPort(parentInPlacePort)[0]->getBaseEdge(LOOK_UP); } - auto edges_for_same_port = getParent()->getChildEdgesAtPort(inputNum); + auto edgesForSamePort = getParent()->getChildEdgesAtPort(inputNum); if (!(look & LOOK_NO_RECURRENT)) { - for (auto edge : edges_for_same_port) { + for (auto edge : edgesForSamePort) { if (edge.get() != this) { auto base = edge->getBaseEdge(LOOK_BOTH | LOOK_NO_RECURRENT); // Return once found the first inplace consumer - if (base != edge && base != edges_for_same_port[0]) return base; + if (base != edge && base != edgesForSamePort[0]) return base; } } } - return edges_for_same_port[0]; + return edgesForSamePort[0]; } bool Edge::inPlace(LOOK look) const { - auto parentSPD = getParent()->getSelectedPrimitiveDescriptor(); - auto childSPD = getChild()->getSelectedPrimitiveDescriptor(); - if (!parentSPD || !childSPD) - IE_THROW() << "Cannot make a decision about reorder. Primitive descriptors weren't selected."; int inputNum = getInputNum(); int outputNum = getOutputNum(); - if (inputNum >= static_cast(parentSPD->getConfig().outConfs.size())) - inputNum = 0; - if (outputNum >= static_cast(childSPD->getConfig().inConfs.size())) - outputNum = 0; - if (look & LOOK_UP) { - if (parentSPD->getConfig().outConfs[inputNum].inPlace() >= 0) + if (getParent()->inPlaceOutPort(inputNum) >= 0) return true; } if (look & LOOK_DOWN) { - if (childSPD->getConfig().inConfs[outputNum].inPlace() >= 0) + if (getChild()->inPlaceInputPort(outputNum) >= 0) return true; } return false; diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index 0292f9767f46f3..d1a8d227dd3fc3 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -51,7 +51,7 @@ class Edge { void init(); void allocate(const void* mem_ptr = nullptr); - void allocate(DnnlMemoryMngrPtr memMngr); + void allocate(MemoryMngrPtr memMngr); void externalAllocate(WeightsSharing::Ptr weightsCache); void reuse(MemoryPtr ptr); void validate(); diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 62e62dd3d3eeec..f9317f92584666 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -5,6 +5,7 @@ #include "node.h" #include "edge.h" #include "extension_mngr.h" +#include "partitioned_mem_mgr.h" #include "itt.h" #include "caseless.hpp" @@ -370,11 +371,13 @@ void Node::resolveInPlaceEdges() { IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { auto parentEdge = getParentEdgeAt(i); + auto inplaceOutIndx = selected_pd->getConfig().inConfs[i].inPlace(); - if (parentEdge->getStatus() != Edge::Status::NotAllocated || selected_pd->getConfig().inConfs[i].inPlace() < 0) + if (inplaceOutIndx < 0) //parentEdge->getStatus() != Edge::Status::NotAllocated || continue; - auto memMgr = parentEdge->getMemory().getDnnlMemoryMngr(); + auto childEdge = getChildEdgesAtPort(inplaceOutIndx).front(); + auto memMgr = std::make_shared(childEdge); parentEdge->getMemoryPtr().reset(new Memory(getEngine())); parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMgr); @@ -382,11 +385,13 @@ void Node::resolveInPlaceEdges() { } for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) { auto childEdge = getChildEdgeAt(i); + auto inplaceInpIndx = selected_pd->getConfig().outConfs[i].inPlace(); - if (childEdge->getStatus() != Edge::Status::NotAllocated || selected_pd->getConfig().outConfs[i].inPlace() < 0) + if (inplaceInpIndx < 0) //childEdge->getStatus() != Edge::Status::NotAllocated || continue; - auto memMgr = childEdge->getMemory().getDnnlMemoryMngr(); + auto parentEdge = getParentEdgesAtPort(inplaceInpIndx).front(); + auto memMgr = std::make_shared(parentEdge); childEdge->getMemoryPtr().reset(new Memory(getEngine())); childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMgr); @@ -1645,5 +1650,29 @@ void Node::initializeDQScales(const float* scaleData, const size_t scaleSize) { DQScales.resize(1); } +int Node::inPlaceInputPort(int portIdx) const { + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); + if (!selected_pd) + IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); + + const auto& conf = selected_pd->getConfig(); + + IE_ASSERT(portIdx >= 0 && portIdx < conf.inConfs.size()) << + "Wrong portIndx: " << portIdx << " acceptable interval: [0, " << conf.inConfs.size() << ")"; + + return conf.inConfs[portIdx].inPlace(); +} +int Node::inPlaceOutPort(int portIdx) const { + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); + if (!selected_pd) + IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); + + const auto& conf = selected_pd->getConfig(); + + IE_ASSERT(portIdx >= 0 && portIdx < conf.outConfs.size()) << + "Wrong portIndx: " << portIdx << " acceptable interval: [0, " << conf.outConfs.size() << ")"; + + return conf.outConfs[portIdx].inPlace(); +} } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 96d3f42aca9aaf..38b70e2cf04b05 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -192,6 +192,9 @@ class Node { const std::vector getParentEdgesAtPort(size_t idx) const; const std::vector getChildEdgesAtPort(size_t idx) const; + int inPlaceInputPort(int portIdx) const; + int inPlaceOutPort(int portIdx) const; + bool isDropped() { return (isEdgesEmpty(childEdges) && isEdgesEmpty(parentEdges)); } @@ -358,7 +361,7 @@ class Node { PerfCount &PerfCounter() { return perfCounter; } - void resolveInPlaceEdges(); + virtual void resolveInPlaceEdges(); virtual void execute(dnnl::stream strm) = 0; void updateShapes(); diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp new file mode 100644 index 00000000000000..d462e3a9597234 --- /dev/null +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp @@ -0,0 +1,67 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "partitioned_mem_mgr.h" + +using namespace ov::intel_cpu; + +MemoryMngrPtr PartitionedMemoryMngr::sourceMemMngrNoThrow() const noexcept { + if (auto pEdge = m_wEdge.lock()) { + MemoryPtr pMem = nullptr; + try { + pMem = pEdge->getMemoryPtr(); + } + catch(...) { + return nullptr; + } + if (pMem) { + if (auto memMngr = pMem->getMemoryMngr()) { + return memMngr; + } + } + } + return nullptr; +} + +MemoryMngrPtr PartitionedMemoryMngr::sourceMemMngr() const { + auto memMngr = sourceMemMngrNoThrow(); + IE_ASSERT(memMngr != nullptr) << "PartitionedMemoryMngr references nullptr"; + return memMngr; +} + +void* PartitionedMemoryMngr::getRawPtr() const noexcept { + if (auto memMngr = sourceMemMngrNoThrow()) { + return static_cast(memMngr->getRawPtr()) + m_offset_blocks * m_size; + } + return nullptr; +} + +void PartitionedMemoryMngr::setExtBuff(void* ptr, size_t size) { + auto memMngr = sourceMemMngr(); + memMngr->setExtBuff(ptr, size); +} + +bool PartitionedMemoryMngr::resize(size_t size) { + auto memMngr = sourceMemMngr(); + m_size = size; + return memMngr->resize(size * m_part); +} + +bool PartitionedMemoryMngr::hasExtBuffer() const noexcept { + if (auto memMngr = sourceMemMngrNoThrow()) { + return memMngr->hasExtBuffer(); + } + return false; +} + +void PartitionedMemoryMngr::registerMemory(Memory* memPtr) { + auto memMngr = sourceMemMngr(); + memMngr->registerMemory(memPtr); +} + +void PartitionedMemoryMngr::unregisterMemory(Memory* memPtr) { + auto memMngr = sourceMemMngr(); + memMngr->unregisterMemory(memPtr); +} + diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h new file mode 100644 index 00000000000000..58eae53d1209a1 --- /dev/null +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "edge.h" + +namespace ov { +namespace intel_cpu { + +class PartitionedMemoryMngr : public IMemoryMngrObserver { +public: + PartitionedMemoryMngr(EdgePtr pEdge, size_t part = 1, ptrdiff_t offset_blocks = 0) + : m_wEdge(pEdge), m_part(part), m_offset_blocks(offset_blocks) {} + + void* getRawPtr() const noexcept override; + void setExtBuff(void* ptr, size_t size) override; + bool resize(size_t size) override; + bool hasExtBuffer() const noexcept override; + void registerMemory(Memory* memPtr) override; + void unregisterMemory(Memory* memPtr) override; + +private: + MemoryMngrPtr sourceMemMngr() const; + MemoryMngrPtr sourceMemMngrNoThrow() const noexcept; + +private: + EdgeWeakPtr m_wEdge; + size_t m_part = 1; // the size of the block as a fraction of the reference memory size + ptrdiff_t m_offset_blocks = 0; // offset from the reference memory beginning in blocks + size_t m_size = 0; // self size in bytes +}; + +} // namespace intel_cpu +} // namespace ov \ No newline at end of file From 329b5923afdc3dcae907c51748fd41b49ebc2018 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 12 May 2023 17:00:39 +0200 Subject: [PATCH 03/28] Concat reshape pattern has been enabled --- src/plugins/intel_cpu/src/nodes/concat.cpp | 75 ++++++--- src/plugins/intel_cpu/src/nodes/concat.h | 1 + src/plugins/intel_cpu/src/nodes/reshape.cpp | 2 +- .../src/concat_reshape_concat.cpp | 147 ++++++++++++++++++ 4 files changed, 200 insertions(+), 25 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index 6c75ad082c564e..b9e79bf20f22ab 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -22,6 +22,7 @@ #include "common/cpu_memcpy.h" #include "common/blocked_desc_creator.h" #include +#include using namespace dnnl; using namespace InferenceEngine; @@ -87,12 +88,11 @@ void Concat::getSupportedDescriptors() { } // we need the first dims before axis to be 1 to avoid the reorder in the edge between the first parent and this concat - // TODO [DS]: inplace - if (!isDynamicNode()) { - const auto& childDims = outputShapes[0].getStaticDims(); - if (std::all_of(childDims.begin(), childDims.begin() + axis, [](size_t dim) { return dim == 1; })) - canBeInPlace = true; - } + + const auto& childDims = outputShapes[0].getDims(); + if (childDims[axis] != Shape::UNDEFINED_DIM && + std::all_of(childDims.begin(), childDims.begin() + axis, [](size_t dim) { return dim == 1; })) + canBeInPlace = true; } void Concat::initSupportedPrimitiveDescriptors() { @@ -179,7 +179,6 @@ void Concat::initSupportedPrimitiveDescriptors() { } } - // TODO [DS]: inplace if (!canBeInPlace || std::any_of(inputShapes.begin(), inputShapes.end(), [](const Shape& shape) { return shape.hasZeroDims(); })) return; @@ -189,33 +188,35 @@ void Concat::initSupportedPrimitiveDescriptors() { auto config = refConfig; auto denseOutDesc = refConfig.outConfs[0].getMemDesc()->as(); - const auto &order = denseOutDesc->getOrder(); + // const auto &order = denseOutDesc->getOrder(); const auto &blkDims = denseOutDesc->getBlockDims(); auto numOfDim = blkDims.size(); SizeVector offsets(numOfDim, 0lu); - SizeVector strides(numOfDim); - strides.back() = 1lu; - size_t offset = Shape::UNDEFINED_DIM; - BlockedMemoryDesc::CmpMask mask = BLOCKED_DESC_SKIP_OFFSET_MASK; // any offset - - for (size_t i = 2; i <= numOfDim; i++) { - if (numOfDim - i < axis) { - strides[numOfDim - i] = Shape::UNDEFINED_DIM; - mask.reset(numOfDim - i); // any strides on certain axis - } else { - strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1]; - } - } + // SizeVector strides(numOfDim); + // strides.back() = 1lu; + // size_t offset = Shape::UNDEFINED_DIM; + //BlockedMemoryDesc::CmpMask mask = BLOCKED_DESC_SKIP_OFFSET_MASK; // any offset + BlockedMemoryDesc::CmpMask mask = BLOCKED_DESC_FULL_MASK; + + // for (size_t i = 2; i <= numOfDim; i++) { + // if (numOfDim - i < axis) { + // strides[numOfDim - i] = Shape::UNDEFINED_DIM; + // mask.reset(numOfDim - i); // any strides on certain axis + // } else { + // strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1]; + // } + // } const auto outDesc = std::dynamic_pointer_cast(refConfig.outConfs[0].getMemDesc()); config.outConfs[0].setMemDesc(outDesc, mask); for (size_t i = 0; i < getParentEdges().size(); i++) { - const auto& srcBlkDims = refConfig.inConfs[i].getMemDesc()->as()->getBlockDims(); - const auto& shape = refConfig.inConfs[i].getMemDesc()->getShape(); + //const auto& srcBlkDims = refConfig.inConfs[i].getMemDesc()->as()->getBlockDims(); + // const auto& shape = refConfig.inConfs[i].getMemDesc()->getShape(); - const auto inDesc = std::make_shared(inputPrecision, shape, srcBlkDims, order, offset, offsets, strides); + // const auto inDesc = std::make_shared(inputPrecision, shape, srcBlkDims, order, offset, offsets, strides); + auto inDesc = std::dynamic_pointer_cast(refConfig.inConfs[i].getMemDesc()); config.inConfs[i].inPlace(0); config.inConfs[i].setMemDesc(inDesc, mask); @@ -723,6 +724,32 @@ void Concat::execRef() { } } +void Concat::resolveInPlaceEdges() { + if (isOptimized()) { + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + auto& config = selected_pd->getConfig(); + size_t numberOfInputs = config.inConfs.size(); + size_t inplaceOutIndx = selected_pd->getConfig().inConfs[0].inPlace(); + auto childEdge = getChildEdgesAtPort(inplaceOutIndx).front(); + for (size_t i = 0; i < numberOfInputs; ++i) { + auto parentEdge = getParentEdgeAt(i); + + // IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected edge status in node: " << + // getName() << " with type " << getTypeStr(); + + auto memMgr = std::make_shared(childEdge, numberOfInputs, i); + parentEdge->getMemoryPtr().reset(new Memory(getEngine())); + parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMgr); + + parentEdge->changeStatus(Edge::Status::Allocated); + } + } else { + Node::resolveInPlaceEdges(); + } +} + } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h index 32831bcede332a..973e71ae2df230 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.h +++ b/src/plugins/intel_cpu/src/nodes/concat.h @@ -26,6 +26,7 @@ class Concat : public Node { bool created() const override; void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override { execute(strm); } + void resolveInPlaceEdges() override; bool isOptimized() const; diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp index 095bbfcb6614fc..d8a8fe67414a16 100644 --- a/src/plugins/intel_cpu/src/nodes/reshape.cpp +++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp @@ -306,7 +306,7 @@ void Reshape::initSupportedPrimitiveDescriptors() { config.inConfs.resize(getParentEdges().size()); auto& creatorsMap = BlockedDescCreator::getCommonCreators(); for (size_t i = 0; i < getParentEdges().size(); i++) { - config.inConfs[i].inPlace(-1); + config.inConfs[i].inPlace(0 == i && canBeInPlace ? 0 : -1); config.inConfs[i].constant(false); config.inConfs[i].setMemDesc(creatorsMap.at(LayoutType::ncsp)->createSharedDesc((i > 0 ? secondInPrc : inPrec), getInputShapeAtPort(i))); } diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp new file mode 100644 index 00000000000000..68f67597da8703 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp @@ -0,0 +1,147 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + + +/*This test runs the following subgraph: + + param1 param2 param3 param4 + | | | | + | | | | + Softmax Softmax Softmax Softmax + | | | | + | | | | + Reshape Reshape Reshape Reshape + | | | | + | | | | + \ / \ / + \ / \ / + \ / \ / + Concat Concat + | | + | | + Reshape Reshape + | | + \ / + \ / + \ / + Concat + | + Softmax + + Result + + The main purpose of this test is checking the code path when all the nodes except Softmax use "in-place" memory mode. + Softmax is used as a model of an arbitrary subgraph preceding the pattern. +*/ + +using namespace InferenceEngine; +using namespace ov::test; + +namespace SubgraphTestsDefinitions { + +using VectorShapes = std::vector; + +class ConcatReshapeConcatSubgraphTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + VectorShapes& inputShapes = obj.param; + + std::ostringstream result; + result << "IS="; + for (const auto& shape : inputShapes) { + result << CommonTestUtils::partialShape2str({shape.first}) << "_"; + } + result << "TS="; + for (const auto& shape : inputShapes) { + result << "("; + if (!shape.second.empty()) { + for (const auto& itr : shape.second) { + result << CommonTestUtils::vec2str(itr); + } + } + result << ")"; + } + return result.str(); + } + + void SetUp() override { + constexpr size_t number_of_params = 4ul; + constexpr size_t softmax_axis = 1ul; + constexpr int concat_axis = 0; + targetDevice = CommonTestUtils::DEVICE_CPU; + auto netPrc = ov::element::f32; + auto& InputShapes = this->GetParam(); + ASSERT_EQ(InputShapes.size(), number_of_params) << "Unexpected number of input shapes"; + init_input_shapes(InputShapes); + auto input_params = ngraph::builder::makeDynamicParams(netPrc, inputDynamicShapes); + + ov::NodeVector first_level_reshapes; + + for (size_t i = 0; i < number_of_params; ++i) { + auto soft_max = std::make_shared(input_params[i], softmax_axis); + auto reshape_param = ngraph::builder::makeConstant(ov::element::i32, {1}, {0}); + auto reshape = std::make_shared(soft_max, reshape_param); + first_level_reshapes.push_back(reshape); + } + + auto concat1 = std::make_shared(ov::NodeVector{first_level_reshapes[0], first_level_reshapes[1]}, concat_axis); + auto concat2 = std::make_shared(ov::NodeVector{first_level_reshapes[2], first_level_reshapes[3]}, concat_axis); + + ov::NodeVector second_level_reshapes; + ov::NodeVector first_level_concats = {concat1, concat2}; + + for (size_t i = 0; i < number_of_params / 2; ++i) { + auto reshape_param = ngraph::builder::makeConstant(ov::element::i32, {1}, {0}); + auto reshape = std::make_shared(first_level_concats[i], reshape_param); + second_level_reshapes.push_back(reshape); + } + + auto concat3 = std::make_shared(second_level_reshapes, concat_axis); + auto soft_max = std::make_shared(concat3, softmax_axis); + + ngraph::ResultVector results; + for (int i = 0; i < soft_max->get_output_size(); i++) + results.push_back(std::make_shared(soft_max->output(i))); + + function = std::make_shared(results, input_params, "ConcatReshapeConcatPattern"); + ov::pass::Serialize serializer("ngraph.xml", "ngraph.bin"); + serializer.run_on_model(function); + } +}; + +TEST_P(ConcatReshapeConcatSubgraphTest, CompareWithRefs) { + run(); + ov::pass::Serialize serializer("exec_graph_dyn.xml", "exec_graph_dyn.bin"); + serializer.run_on_model(std::const_pointer_cast(compiledModel.get_runtime_model())); +} + +namespace { + +const std::vector> inputShapes = { + // { + // // {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + // {{2, 64}, {{2, 64}}}, // input 0 + // {{2, 64}, {{2, 64}}}, // input 1 + // {{2, 64}, {{2, 64}}}, // input 2 + // {{2, 64}, {{2, 64}}} // input 3 + // }, + { + // {{dynamic shape}, {{static shape case1}, {static shape case2}, ...} + {{2, -1}, {{2, 64}}}, // input 0 + {{2, -1}, {{2, 64}}}, // input 1 + {{2, -1}, {{2, 64}}}, // input 2 + {{2, -1}, {{2, 64}}} // input 3 + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Concat_Reshape_Concat, ConcatReshapeConcatSubgraphTest, + ::testing::ValuesIn(inputShapes), + ConcatReshapeConcatSubgraphTest::getTestCaseName); +} // namespace +} // namespace SubgraphTestsDefinitions \ No newline at end of file From a1b63153867e3a36c21017e83e6f8593dfb89f18 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 25 May 2023 19:02:24 +0200 Subject: [PATCH 04/28] Enhanced in place conflicts detection --- src/plugins/intel_cpu/src/edge.cpp | 103 ++++++++-------------------- src/plugins/intel_cpu/src/edge.h | 2 +- src/plugins/intel_cpu/src/graph.cpp | 78 +++++++++++++++++++++ src/plugins/intel_cpu/src/node.cpp | 23 ++++++- src/plugins/intel_cpu/src/node.h | 4 +- 5 files changed, 130 insertions(+), 80 deletions(-) diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 21fff04b955b08..57e3c25f1692fc 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -97,63 +97,21 @@ bool Edge::enforceReorder() { } } - auto childCanModifyMem = [](const Edge& edge) { - bool result = false; - int outNumber = edge.getOutputNum(); - if (auto childSPD = edge.getChild()->getSelectedPrimitiveDescriptor()) { - result = childSPD->getConfig().outConfs.empty(); - for (const auto& conf : childSPD->getConfig().outConfs) { - if (outNumber >= 0 && conf.inPlace() == outNumber && edge.getChild()->isExecutable()) - return true; - } - } - return result; - }; - - const auto& detectInPlaceChildrenNum = [&childCanModifyMem](const std::vector& edges) -> size_t { - size_t count = 0; - for (const auto& edge : edges) { - if (childCanModifyMem(*edge)) { - count++; - } - } - return count; - }; - int inNumber = getInputNum(); - const auto portChildEdges = parentNode->getChildEdgesAtPort(inNumber); - if (childCanModifyMem(*this) && portChildEdges.size() > 1) { - if (childNode->getType() == Type::Convolution) { - auto execIndex = childNode->getExecIndex(); - for (auto pEdgePeer : portChildEdges) { - if (pEdgePeer.get() == this) - continue; - std::vector vecConsumers; - pEdgePeer->collectConsumers(vecConsumers); - for (auto node : vecConsumers) { - if (node->getExecIndex() >= execIndex) { - return true; - } + if (portChildEdges.size() > 1) { + if (in_place) { + for (auto& p_edge_peer : portChildEdges) { + if (p_edge_peer.get() == this) + continue; + if (p_edge_peer->inPlace(LOOK_DOWN)) { //p_edge_peer->getChild()->getType() != Type::Reorder && + return true; } } - } else if (in_place && detectInPlaceChildrenNum(portChildEdges) > 1) { - return true; } } - // if (!canBeInPlaceConflicts && in_place && !parentNode->getChildEdges().empty()) { - // for (auto& p_edge_peer : portChildEdges) { - // if (p_edge_peer.get() == this) - // continue; - // if (p_edge_peer->getChild()->getType() != Type::Reorder && p_edge_peer->inPlace(LOOK_DOWN)) { - // canBeInPlaceConflicts = true; - // break; - // } - // } - // } - // In case the parent node is an input constant, the memory is unaligned and the child primitive isa is SSE, // we have to insert reorder since the vast majority of arithmetic and data processing instructions in legacy SSE isa requires // the memory address in the operands must be aligned on 16-byte boundary. @@ -525,22 +483,22 @@ void Edge::init() { } sharedMemFrom(edgePtr); } - - auto port = getInputNum(); - if (port < 0) - return; - auto edges_at_same_port = getParent()->getChildEdgesAtPort(static_cast(port)); - for (auto edge : edges_at_same_port) { - if (edge->getStatus() != Status::NeedAllocation && edge->getStatus() != Status::Uninitialized) { - if (edge->getSharedEdge() != edgePtr) - IE_THROW() << "Unsupported behavior. Cannot mark edge " - << getParent()->getChildEdgeAt(0)->getParent()->getName() << "->" - << getParent()->getChildEdgeAt(0)->getChild()->getName() << " as not allocated!"; - } else { - if (edge != edgePtr) - edge->sharedMemFrom(edgePtr); - } - } +// + // auto port = getInputNum(); + // if (port < 0) + // return; + // auto edges_at_same_port = getParent()->getChildEdgesAtPort(static_cast(port)); + // for (auto edge : edges_at_same_port) { + // if (edge->getStatus() != Status::NeedAllocation && edge->getStatus() != Status::Uninitialized) { + // if (edge->getSharedEdge() != edgePtr) + // IE_THROW() << "Unsupported behavior. Cannot mark edge " + // << getParent()->getChildEdgeAt(0)->getParent()->getName() << "->" + // << getParent()->getChildEdgeAt(0)->getChild()->getName() << " as not allocated!"; + // } else { + // if (edge != edgePtr) + // edge->sharedMemFrom(edgePtr); + // } + // } } /** @@ -572,19 +530,16 @@ EdgePtr Edge::getBaseEdge(int look) { break; } } - return next_ch_edge->getBaseEdge(LOOK_DOWN); + return next_ch_edge; } else if (parentInPlacePort >= 0 && (look & LOOK_UP)) { - return getParent()->getParentEdgesAtPort(parentInPlacePort)[0]->getBaseEdge(LOOK_UP); + return getParent()->getParentEdgesAtPort(parentInPlacePort)[0]; } auto edgesForSamePort = getParent()->getChildEdgesAtPort(inputNum); - if (!(look & LOOK_NO_RECURRENT)) { - for (auto edge : edgesForSamePort) { - if (edge.get() != this) { - auto base = edge->getBaseEdge(LOOK_BOTH | LOOK_NO_RECURRENT); - // Return once found the first inplace consumer - if (base != edge && base != edgesForSamePort[0]) return base; - } + for (auto edge : edgesForSamePort) { + if (edge.get() != this) { + // Return once found the first inplace consumer + if (edge->inPlace() && edge != edgesForSamePort[0]) return edge; } } return edgesForSamePort[0]; diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index d1a8d227dd3fc3..8647ea3f779f3a 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -103,7 +103,7 @@ class Edge { void collectConsumers(std::vector>& result) const; - enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN, LOOK_NO_RECURRENT = 4 }; + enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN }; EdgePtr getBaseEdge(int look = LOOK_BOTH); bool inPlace(LOOK look = LOOK_BOTH) const; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 9e02f2be456910..bc3787336a7c7f 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -633,6 +633,84 @@ void Graph::InitEdges() { updateEdge(i); } } + + // secondary pass to eliminate complex implace conflicts + std::function findNodeModifyingMemory; + findNodeModifyingMemory = [&findNodeModifyingMemory](const EdgePtr& edge) -> NodePtr { + auto childNode = edge->getChild(); + if (childNode && childNode->isInPlace()) { + // check if the children nodes are able to modify the memory + auto childPort = edge->getOutputNum(); + auto inPlaceInputPort = childNode->inPlaceInputPort(childPort); + if (inPlaceInputPort >= 0) { + if (childNode->isExecutable()) { + // Node can modify the memory + return childNode; + } + for (auto&& edge : childNode->getChildEdgesAtPort(inPlaceInputPort)) { + // continue searching + if (auto result = findNodeModifyingMemory(edge)) { + return result; + } + } + } + // check backward dependency + if (auto childSPD = childNode->getSelectedPrimitiveDescriptor()) { + auto& outConfs = childSPD->getConfig().outConfs; + for (size_t i = 0; i < outConfs.size(); ++i) { + const auto& conf = outConfs[i]; + if (childPort >= 0 && conf.inPlace() == childPort) { + if (childNode->isExecutable()) { + // Node can modify the memory + return childNode; + } + for (auto&& edge : childNode->getChildEdgesAtPort(i)) { + // continue searching + if (auto result = findNodeModifyingMemory(edge)) { + return result; + } + } + } + } + } + } + // nothing has been found + return nullptr; + }; + + auto needReorder = [&findNodeModifyingMemory](const EdgePtr& edge) -> bool { + int inNumber = edge->getInputNum(); + const auto portChildEdges = edge->getParent()->getChildEdgesAtPort(inNumber); + if (portChildEdges.size() > 1) { + if (auto modifyingNode = findNodeModifyingMemory(edge)) { + auto execIndex = modifyingNode->getExecIndex(); + for (auto pEdgePeer : portChildEdges) { + if (pEdgePeer == edge) + continue; + std::vector vecConsumers; + pEdgePeer->collectConsumers(vecConsumers); + + for (auto node : vecConsumers) { + if (node->getExecIndex() >= execIndex) { + return true; + } + } + } + } + } + return false; + }; + + numberOfEdges = graphEdges.size(); //update the total number + + for (auto i = 0; i < numberOfEdges; i++) { + auto edge = graphEdges[i]; + if (needReorder(edge)) { + constexpr bool optimizedReorder = false; + insertReorder(edge, optimizedReorder); + updateEdge(i); + } + } } static inline bool isConstOutput(EdgePtr edge) { diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index f9317f92584666..ee806e8867e08f 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -886,7 +886,7 @@ MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { return ptr; } -bool Node::isInPlace() { +bool Node::isInPlace() const { if (inplace == InPlaceType::Unknown) { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) @@ -1444,14 +1444,31 @@ bool Node::isInputTensorAtPortEmpty(size_t port) const { if (inputShapes.size() <= port) { IE_THROW() << "Incorrect input port number for node " << getName(); } - return getParentEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims(); + + if (inputShapes[port].isStatic()) { + return inputShapes[port].hasZeroDims(); + } else { + auto& mem = getParentEdgesAtPort(port)[0]->getMemory(); + if (mem.isAllocated()) { + return mem.GetShape().hasZeroDims(); + } + } + return false; } bool Node::isOutputTensorAtPortEmpty(size_t port) const { if (outputShapes.size() <= port) { IE_THROW() << "Incorrect output port number for node " << getName(); } - return getChildEdgesAtPort(port)[0]->getMemory().GetShape().hasZeroDims(); + if (outputShapes[port].isStatic()) { + return outputShapes[port].hasZeroDims(); + } else { + auto& mem = getChildEdgesAtPort(port)[0]->getMemory(); + if (mem.isAllocated()) { + return mem.GetShape().hasZeroDims(); + } + } + return false; } bool Node::hasEmptyInputTensors() const { diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 38b70e2cf04b05..625699a92be726 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -203,7 +203,7 @@ class Node { return engine; } - bool isInPlace(); + bool isInPlace() const; // must be called only after Graph::InitEdges() virtual bool isExecutable() const { @@ -601,7 +601,7 @@ class Node { Const, NoConst }; - InPlaceType inplace = InPlaceType::Unknown; + mutable InPlaceType inplace = InPlaceType::Unknown; ConstantType constant = ConstantType::Unknown; std::vector internalBlobs; std::vector internalBlobMemory; From 8f9cadb4a183ae46e757794a3f4b0f1b23e56701 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 25 May 2023 19:04:51 +0200 Subject: [PATCH 05/28] Refactor Concat --- src/plugins/intel_cpu/src/infer_request.cpp | 9 +- src/plugins/intel_cpu/src/nodes/concat.cpp | 121 ++------------------ src/plugins/intel_cpu/src/nodes/concat.h | 2 - 3 files changed, 14 insertions(+), 118 deletions(-) diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 82a7ad8b16c5a5..fc9757447330c7 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -220,12 +220,9 @@ void InferRequestBase::changeDefaultPtr() { break; } - if (child->getType() == Type::Concatenation) { - auto concat = dynamic_cast(child.get()); - if (concat && concat->isOptimized()) { - canBeInPlace = false; - break; - } + if (child->getType() == Type::Concatenation && child->isInPlace()) { + canBeInPlace = false; + break; } // Cannot be in-place before split because split is using different ptrs without offsets diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index b9e79bf20f22ab..4b0bb67eace259 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -34,7 +34,7 @@ namespace { } bool Concat::isExecutable() const { - return !hasEmptyOutputTensors() && !isOptimized(); + return !isInPlace() && !hasEmptyOutputTensors(); } bool Concat::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { @@ -119,7 +119,8 @@ void Concat::initSupportedPrimitiveDescriptors() { const auto& dstShape = getOutputShapeAtPort(0); std::vector tdCreatorTypes = {LayoutType::ncsp, LayoutType::nspc}; - // check if blocked layouts are available the channels size should be evenly divided by the block size to avoid slow oneDNN ref implementation + // check if blocked layouts are available the channels size should be evenly divided by the block size to avoid slow oneDNN ref implementation and allow + // inPlace memory usage if possible if (dstShape.getRank() > channelAxis) { for (auto& item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c)}) { const VectorDims &blkDims = dstShape.getDims(); @@ -159,12 +160,7 @@ void Concat::initSupportedPrimitiveDescriptors() { config.inConfs[i].inPlace(-1); config.inConfs[i].constant(false); auto desc = itr->second->createSharedDesc(inputPrecision, getInputShapeAtPort(i)); - // TODO [DS]: inplace - if (isDynamicNode()) { - config.inConfs[i].setMemDesc(desc); - } else { - config.inConfs[i].setMemDesc(desc, BLOCKED_DESC_EMPTY_MASK); - } + config.inConfs[i].setMemDesc(desc); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); if (itr->first != LayoutType::nspc) { @@ -184,42 +180,9 @@ void Concat::initSupportedPrimitiveDescriptors() { // Optimized inplace case for (auto refPdIndex : pdIndexesToReuse) { - const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); - auto config = refConfig; - - auto denseOutDesc = refConfig.outConfs[0].getMemDesc()->as(); - // const auto &order = denseOutDesc->getOrder(); - const auto &blkDims = denseOutDesc->getBlockDims(); - auto numOfDim = blkDims.size(); - - SizeVector offsets(numOfDim, 0lu); - // SizeVector strides(numOfDim); - // strides.back() = 1lu; - // size_t offset = Shape::UNDEFINED_DIM; - //BlockedMemoryDesc::CmpMask mask = BLOCKED_DESC_SKIP_OFFSET_MASK; // any offset - BlockedMemoryDesc::CmpMask mask = BLOCKED_DESC_FULL_MASK; - - // for (size_t i = 2; i <= numOfDim; i++) { - // if (numOfDim - i < axis) { - // strides[numOfDim - i] = Shape::UNDEFINED_DIM; - // mask.reset(numOfDim - i); // any strides on certain axis - // } else { - // strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1]; - // } - // } - - const auto outDesc = std::dynamic_pointer_cast(refConfig.outConfs[0].getMemDesc()); - config.outConfs[0].setMemDesc(outDesc, mask); - - for (size_t i = 0; i < getParentEdges().size(); i++) { - //const auto& srcBlkDims = refConfig.inConfs[i].getMemDesc()->as()->getBlockDims(); - // const auto& shape = refConfig.inConfs[i].getMemDesc()->getShape(); - - // const auto inDesc = std::make_shared(inputPrecision, shape, srcBlkDims, order, offset, offsets, strides); - auto inDesc = std::dynamic_pointer_cast(refConfig.inConfs[i].getMemDesc()); - + auto config = supportedPrimitiveDescriptors[refPdIndex].getConfig();; + for (size_t i = 0; i < config.inConfs.size(); i++) { config.inConfs[i].inPlace(0); - config.inConfs[i].setMemDesc(inDesc, mask); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -345,19 +308,15 @@ bool Concat::created() const { return getType() == Type::Concatenation; } -bool Concat::isOptimized() const { - return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].inPlace() >= 0; -} - bool Concat::needPrepareParams() const { - if (canOptimizeNspc) { + if (canOptimizeNspc || isInPlace()) { return false; } return inputShapesModified(); } void Concat::prepareParams() { - if (canOptimizeNspc || isOptimized()) + if (canOptimizeNspc || isInPlace()) return; const auto& dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); @@ -456,7 +415,7 @@ void Concat::initOptimalPrimitiveDescriptor() { if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - if (!isOptimized()) { + if (!isInPlace()) { Node::initOptimalPrimitiveDescriptor(); auto config = selected_pd->getConfig(); if (!isConfigDefined(config)) { @@ -473,64 +432,6 @@ void Concat::initOptimalPrimitiveDescriptor() { } } - auto config = selected_pd->getConfig(); - if (!isDynamicNode() && !isConfigDefined(config)) { - for (size_t i = 0; i < config.outConfs.size(); i++) { - int num = getChildEdgeAt(i)->getOutputNum(); - if (num >= 0) { - auto childConf = getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num]; - childConf.setMemDesc(childConf.getMemDesc()->cloneWithNewPrecision(config.outConfs[i].getMemDesc()->getPrecision())); - - if (getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()) { - if (!childConf.getMemDesc()->isDefined() && childConf.inPlace() >= 0) - getChildEdgeAt(i)->getChild()->initOptimalPrimitiveDescriptor(); - - if (childConf.getMemDesc()->isDefined() && config.outConfs[i].getPortDesc()->isCompatible(*childConf.getPortDesc())) { - config.outConfs[i].setMemDesc(childConf.getMemDesc()); - continue; - } - } - } - - // reset mask - config.outConfs[i].setMemDesc(config.outConfs[i].getMemDesc()); - } - auto firstOutBlockingDesc = config.outConfs[0].getMemDesc()->as(); - size_t offset = 0; - for (size_t i = 0; i < config.inConfs.size(); i++) { - auto oldDesc = config.inConfs[i].getMemDesc(); - auto inpBlockingDesc = oldDesc->as(); - - config.inConfs[i].setMemDesc( - std::make_shared( - inpBlockingDesc->getPrecision(), - inpBlockingDesc->getShape(), - inpBlockingDesc->getBlockDims(), - inpBlockingDesc->getOrder(), - firstOutBlockingDesc->getOffsetPadding() + offset, - firstOutBlockingDesc->getOffsetPaddingToData(), - firstOutBlockingDesc->getStrides()), - BLOCKED_DESC_FULL_MASK); - size_t axisSize = 1; - - auto firstInpBlockingDesc = config.inConfs[0].getMemDesc()->as(); - if (firstInpBlockingDesc->hasLayoutType(LayoutType::nspc)) { - // This is more general and works for any "direct" Layout (such as nchw or nhwc), but it doesn't work for blocked - size_t realAxis = inverseOrder(firstInpBlockingDesc->getOrder(), axis); - for (size_t j = realAxis; j < inpBlockingDesc->getBlockDims().size(); j++) { - size_t jj = firstInpBlockingDesc->getOrder()[j]; - axisSize *= inpBlockingDesc->getBlockDims()[jj]; - } - } else { - // This works for nchw and nchw8c/nchw16c - for (size_t j = axis; j < inpBlockingDesc->getBlockDims().size(); j++) { - axisSize *= inpBlockingDesc->getBlockDims()[j]; - } - } - offset += axisSize; - } - initDescriptor(config); - } //block layout may have axis greater than rank, disable ref_concat auto primDesc = getSelectedPrimitiveDescriptor(); auto memDesc = primDesc->getConfig().outConfs[0].getMemDesc()->as(); @@ -548,7 +449,7 @@ void Concat::initOptimalPrimitiveDescriptor() { } void Concat::execute(dnnl::stream strm) { - if (isOptimized()) { + if (isInPlace()) { return; } @@ -725,7 +626,7 @@ void Concat::execRef() { } void Concat::resolveInPlaceEdges() { - if (isOptimized()) { + if (isInPlace()) { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h index 973e71ae2df230..f17a706fccd737 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.h +++ b/src/plugins/intel_cpu/src/nodes/concat.h @@ -28,8 +28,6 @@ class Concat : public Node { void executeDynamicImpl(dnnl::stream strm) override { execute(strm); } void resolveInPlaceEdges() override; - bool isOptimized() const; - InferenceEngine::Precision getRuntimePrecision() const override; bool isExecutable() const override; From 8529c718e7bbf14410dda13913fd7f715464b761 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 25 May 2023 19:05:12 +0200 Subject: [PATCH 06/28] Fix Reshape isExecutable call --- src/plugins/intel_cpu/src/nodes/reshape.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp index d8a8fe67414a16..bc2501102482cd 100644 --- a/src/plugins/intel_cpu/src/nodes/reshape.cpp +++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp @@ -334,8 +334,14 @@ void Reshape::execute(dnnl::stream strm) { } bool Reshape::isExecutable() const { - bool inPlaceEnabled = - getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].inPlace() >= 0; + bool inPlaceEnabled = false; + if (auto prim_desc = getSelectedPrimitiveDescriptor()) { + auto& config = prim_desc->getConfig(); + if (config.inConfs[0].inPlace() >= 0 || + config.outConfs[0].inPlace() >= 0) { + inPlaceEnabled = true; + } + } return !inPlaceEnabled; } From 1c2b0833748f17e923efd826d765728d03f3dd70 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 25 May 2023 19:08:14 +0200 Subject: [PATCH 07/28] Split node refactoring --- src/plugins/intel_cpu/src/nodes/split.cpp | 139 ++++++------------ src/plugins/intel_cpu/src/nodes/split.h | 2 +- .../intel_cpu/src/partitioned_mem_mgr.cpp | 10 +- .../intel_cpu/src/partitioned_mem_mgr.h | 9 +- 4 files changed, 58 insertions(+), 102 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index 1476c373ab276f..ab0068865efdc1 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -12,6 +12,7 @@ #include "utils/general_utils.h" #include #include "utils/ngraph_utils.hpp" +#include #define THROW_ERROR IE_THROW() << "Split layer with name '" << getName() <<"' " @@ -99,12 +100,11 @@ void Split::initSupportedPrimitiveDescriptors() { InferenceEngine::Precision inpPrecision = getOriginalInputPrecisionAtPort(0); const auto axisPrecision = Precision::I32; - auto outPrecision = inpPrecision; // the split layer doesn't convert precisions // Set plain and tailC formats std::vector tdCreatorTypes{ LayoutType::ncsp, LayoutType::nspc }; - // Support channel blocked format + // Support channel blocked format only if we manipulate complete blocks if (srcShape.getRank() > 2) { for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c) }) { const auto &blkDims = srcShape.getDims(); @@ -163,43 +163,15 @@ void Split::initSupportedPrimitiveDescriptors() { } } - // Optimized inplace case - // TODO [DS]: inplace - if (!isDynamicNode()) { + // in place only makes sense when we split by dense blocks since strided tensors are not supported by most nodes. + const auto& parentdDims = inputShapes[0].getDims(); + if (parentdDims[axis] != Shape::UNDEFINED_DIM && + std::all_of(parentdDims.begin(), parentdDims.begin() + axis, [](size_t dim) { return dim == 1; })) { for (auto refPdIndex : pdIndexesToReuse) { - const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); - auto config = refConfig; - const auto inBlockingDesc = refConfig.inConfs[0].getMemDesc()->as(); - const auto& order = inBlockingDesc->getOrder(); - const auto& blkDims = inBlockingDesc->getBlockDims(); - auto numOfDim = blkDims.size(); - - SizeVector offsets(numOfDim, 0lu); - SizeVector strides(numOfDim); - strides.back() = 1lu; - size_t offset = Shape::UNDEFINED_DIM; - BlockedMemoryDesc::CmpMask mask = BLOCKED_DESC_SKIP_OFFSET_MASK; // accepts any offset - - for (size_t i = 2; i <= numOfDim; i++) { - if (numOfDim - i < axis) { - strides[numOfDim - i] = Shape::UNDEFINED_DIM; - mask.reset(numOfDim - i); // accepts any strides on axis - } else { - strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1]; - } - } - - config.inConfs[0].setMemDesc(std::dynamic_pointer_cast(refConfig.inConfs[0].getMemDesc()), mask); - - for (size_t i = 0; i < outputShapes.size(); i++) { - auto outBlockingDesc = refConfig.outConfs[i].getMemDesc()->as(); - const auto& outBlkDims = outBlockingDesc->getBlockDims(); - const auto& shape = outBlockingDesc->getShape(); - const auto& dims = shape.getStaticDims(); + auto config = supportedPrimitiveDescriptors[refPdIndex].getConfig(); + for (size_t i = 0; i < config.outConfs.size(); i++) { config.outConfs[i].inPlace(0); - config.outConfs[i].setMemDesc(std::make_shared(outPrecision, Shape(dims), outBlkDims, order, offset, offsets, - shape.hasZeroDims() ? SizeVector(numOfDim, 0) : strides), mask); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -251,7 +223,7 @@ bool Split::needShapeInfer() const { } bool Split::needPrepareParams() const { - if (isOptimized()) { + if (isInPlace()) { return false; } return needShapeInfer(); @@ -296,11 +268,11 @@ void Split::prepareParams() { } bool Split::isExecutable() const { - return !isInputTensorAtPortEmpty(0) && !isOptimized(); + return !isInPlace() && !isInputTensorAtPortEmpty(0); } void Split::execute(dnnl::stream strm) { - if (isOptimized()) { + if (isInPlace()) { return; } @@ -323,65 +295,13 @@ bool Split::created() const { return getType() == Type::Split; } -bool Split::isOptimized() const { - return getSelectedPrimitiveDescriptor() && getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].inPlace() >= 0; -} - void Split::initOptimalPrimitiveDescriptor() { + Node::initOptimalPrimitiveDescriptor(); auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) THROW_ERROR << "Preferable primitive descriptor is not set."; - auto config = selected_pd->getConfig(); - - if (!isOptimized()) { - Node::initOptimalPrimitiveDescriptor(); - } else if (!isDynamicNode() && !isConfigDefined(config)) { - for (size_t i = 0; i < config.inConfs.size(); i++) { - int num = getParentEdgeAt(i)->getInputNum(); - if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) { - if (num >= 0) { - const auto& parentConfig = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num]; - if (!parentConfig.getMemDesc()->isDefined() && parentConfig.inPlace() >= 0) - getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor(); - if (parentConfig.getMemDesc()->isDefined() && config.inConfs[i].getPortDesc()->isCompatible(*parentConfig.getPortDesc())) { - config.inConfs[i].setMemDesc(parentConfig.getMemDesc()); - continue; - } - } - } - // reset mask - config.inConfs[i].setMemDesc(config.inConfs[i].getMemDesc()); - } - if (config.outConfs.size() != outputShapes.size()) - THROW_ERROR << "has invalid config"; - - auto firstInBlockingDesc = config.inConfs[0].getMemDesc()->as(); - size_t offset = 0; - for (size_t i = 0; i < outputShapes.size(); i++) { - auto oldDesc = config.outConfs[i].getMemDesc(); - auto outBlockingDesc = oldDesc->as(); - const auto& shape = outBlockingDesc->getShape(); - const auto& blkDims = outBlockingDesc->getBlockDims(); - config.outConfs[i].setMemDesc(std::make_shared(outBlockingDesc->getPrecision(), - shape, - blkDims, - outBlockingDesc->getOrder(), - firstInBlockingDesc->getOffsetPadding() + offset, - firstInBlockingDesc->getOffsetPaddingToData(), - (shape.hasZeroDims() ? VectorDims(blkDims.size(), 0) : - firstInBlockingDesc->getStrides())), BLOCKED_DESC_FULL_MASK); - - size_t axisSize = 1; - for (size_t j = axis; j < outBlockingDesc->getBlockDims().size(); j++) { - axisSize *= outBlockingDesc->getBlockDims()[j]; - } - offset += axisSize; - } - initDescriptor(config); - } - - config = selected_pd->getConfig(); + auto config = selected_pd->getConfig(); canUseOptimizedNspc2Ncsp = false; IE_ASSERT(config.inConfs.size() > 0); const auto inConfDesc = config.inConfs[0].getMemDesc(); @@ -602,6 +522,39 @@ void Split::SplitOptimizedExecutor::exec(const uint8_t* srcData, const std::vect }); } +void Split::resolveInPlaceEdges() { + if (isInPlace()) { + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + auto& config = selected_pd->getConfig(); + size_t numberOfOutputs = config.outConfs.size(); + size_t inplaceInpIndx = selected_pd->getConfig().outConfs[0].inPlace(); + auto baseDim = inputShapes.front().getDims()[axis]; + IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << "Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimention"; + auto parentEdge = getParentEdgesAtPort(inplaceInpIndx).front(); + ptrdiff_t offset = 0; + for (size_t i = 0; i < numberOfOutputs; ++i) { + auto partDim = outputShapes[i].getDims()[axis]; + IE_ASSERT(partDim != Shape::UNDEFINED_DIM) << "Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimention"; + const auto& childEdges = getChildEdgesAtPort(i); + for (auto& childEdge : childEdges) { + // IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected edge status in node: " << + // getName() << " with type " << getTypeStr(); + + auto memMgr = std::make_shared(parentEdge, baseDim, offset, partDim); + childEdge->getMemoryPtr().reset(new Memory(getEngine())); + childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMgr); + + childEdge->changeStatus(Edge::Status::Allocated); + } + offset += partDim; + } + } else { + Node::resolveInPlaceEdges(); + } +} + } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/split.h b/src/plugins/intel_cpu/src/nodes/split.h index 449413439c3f7e..4d45878f4fd229 100644 --- a/src/plugins/intel_cpu/src/nodes/split.h +++ b/src/plugins/intel_cpu/src/nodes/split.h @@ -23,7 +23,6 @@ class Split : public Node { void execute(dnnl::stream strm) override; bool created() const override; - bool isOptimized() const; void initOptimalPrimitiveDescriptor() override; bool isExecutable() const override; @@ -32,6 +31,7 @@ class Split : public Node { bool needShapeInfer() const override; void prepareParams() override; void executeDynamicImpl(dnnl::stream strm) override { execute(strm); } + void resolveInPlaceEdges() override; private: struct SplitExecutor { diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp index d462e3a9597234..75e4bd95b1822c 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp @@ -32,7 +32,7 @@ MemoryMngrPtr PartitionedMemoryMngr::sourceMemMngr() const { void* PartitionedMemoryMngr::getRawPtr() const noexcept { if (auto memMngr = sourceMemMngrNoThrow()) { - return static_cast(memMngr->getRawPtr()) + m_offset_blocks * m_size; + return static_cast(memMngr->getRawPtr()) + m_offset_blocks * m_size / m_size_blocks; } return nullptr; } @@ -45,7 +45,7 @@ void PartitionedMemoryMngr::setExtBuff(void* ptr, size_t size) { bool PartitionedMemoryMngr::resize(size_t size) { auto memMngr = sourceMemMngr(); m_size = size; - return memMngr->resize(size * m_part); + return memMngr->resize(m_size * m_total_blocks / m_size_blocks); } bool PartitionedMemoryMngr::hasExtBuffer() const noexcept { @@ -61,7 +61,9 @@ void PartitionedMemoryMngr::registerMemory(Memory* memPtr) { } void PartitionedMemoryMngr::unregisterMemory(Memory* memPtr) { - auto memMngr = sourceMemMngr(); - memMngr->unregisterMemory(memPtr); + if (!m_wEdge.expired()) { + auto memMngr = sourceMemMngr(); + memMngr->unregisterMemory(memPtr); + } } diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h index 58eae53d1209a1..bfdcb9dabccebd 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h @@ -11,8 +11,8 @@ namespace intel_cpu { class PartitionedMemoryMngr : public IMemoryMngrObserver { public: - PartitionedMemoryMngr(EdgePtr pEdge, size_t part = 1, ptrdiff_t offset_blocks = 0) - : m_wEdge(pEdge), m_part(part), m_offset_blocks(offset_blocks) {} + PartitionedMemoryMngr(EdgePtr pEdge, size_t total_blocks = 1, ptrdiff_t offset_blocks = 0, size_t size_blocks = 1) + : m_wEdge(pEdge), m_total_blocks(total_blocks), m_offset_blocks(offset_blocks), m_size_blocks(size_blocks) {} void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; @@ -27,8 +27,9 @@ class PartitionedMemoryMngr : public IMemoryMngrObserver { private: EdgeWeakPtr m_wEdge; - size_t m_part = 1; // the size of the block as a fraction of the reference memory size - ptrdiff_t m_offset_blocks = 0; // offset from the reference memory beginning in blocks + size_t m_total_blocks = 1; // size of the parent memory in blocks + ptrdiff_t m_offset_blocks = 0; // offset from the base pointer in blocks + size_t m_size_blocks = 1; // size of the partition in blocks size_t m_size = 0; // self size in bytes }; From e46bdfc91f1bf09a12898c037d66c659e90a36a0 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 25 May 2023 19:10:13 +0200 Subject: [PATCH 08/28] Gather node inPlace special case --- src/plugins/intel_cpu/src/nodes/gather.cpp | 61 ++++++++++++++++++++++ src/plugins/intel_cpu/src/nodes/gather.h | 3 ++ 2 files changed, 64 insertions(+) diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index 16982f80ab065a..1ac5392fc7985f 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -12,6 +12,7 @@ #include #include "kernels/x64/gather_uni_kernel.hpp" #include "utils/shape_inference/shape_inference_cpu.hpp" +#include using namespace InferenceEngine; using namespace dnnl::impl::cpu; @@ -162,6 +163,10 @@ Gather::Gather(const std::shared_ptr& op, const GraphContext::CPtr con if (axis < 0 || axis >= dataSrcRank || batchDims > axis) THROW_ERROR << "has incorrect input parameter axis value: " << axis; } + + if (auto indices = ov::as_type(op->get_input_node_ptr(GATHER_INDICES))) { + constIndices = indices->cast_vector(); + } } void Gather::initSupportedPrimitiveDescriptors() { @@ -201,9 +206,28 @@ void Gather::initSupportedPrimitiveDescriptors() { {LayoutType::ncsp, Precision::I32, isAxisInputConst}}, {{LayoutType::ncsp, dataPrecision}}, ref_any); + + // Let's check for the special inPlace memory use case + // in place only makes sense when we split by dense blocks since strided tensors are not supported by most nodes + + const auto& parentdDims = inputShapes[0].getDims(); + if (isAxisInputConst && + 0 == batchDims && + 1 == constIndices.size() && + parentdDims[axis] != Shape::UNDEFINED_DIM && + std::all_of(parentdDims.begin(), parentdDims.begin() + axis, [](size_t dim) { return dim == 1; })) { + addSupportedPrimDesc({{LayoutType::ncsp, dataPrecision}, + {LayoutType::ncsp, Precision::I32}, + {LayoutType::ncsp, Precision::I32, isAxisInputConst}}, + {{LayoutType::ncsp, dataPrecision, false, GATHER_DATA}}, + unknown); + } } void Gather::createPrimitive() { + if (isInPlace()) { + return; + } #if defined(OPENVINO_ARCH_X86_64) uint64_t idxElPerVec = 1; if (!isDynamicNode()) { @@ -274,6 +298,9 @@ void Gather::createPrimitive() { } bool Gather::needPrepareParams() const { + if (isInPlace()) { + return false; + } bool result = inputShapesModified(); if (!isAxisInputConst) result = result || axis != (reinterpret_cast(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->GetPtr()))[0]; @@ -556,6 +583,40 @@ bool Gather::created() const { return getType() == Type::Gather; } +bool Gather::isExecutable() const { + return !isInPlace() && Node::isExecutable(); +} + +void Gather::resolveInPlaceEdges() { + if (isInPlace()) { + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + constexpr size_t outputPort = 0; + + auto& config = selected_pd->getConfig(); + size_t inplaceInpIndx = selected_pd->getConfig().outConfs[outputPort].inPlace(); + auto baseDim = inputShapes.front().getDims()[axis]; + IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << "Gather node: " << getName() << " can not use inPlace memory with splitting on dynamic dimention"; + auto parentEdge = getParentEdgesAtPort(inplaceInpIndx).front(); + auto index = constIndices.at(0); + ptrdiff_t offset = index < 0 ? baseDim + index : index; + const auto& childEdges = getChildEdgesAtPort(outputPort); + for (auto& childEdge : childEdges) { + // IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected edge status in node: " << + // getName() << " with type " << getTypeStr(); + + auto memMgr = std::make_shared(parentEdge, baseDim, offset); + childEdge->getMemoryPtr().reset(new Memory(getEngine())); + childEdge->getMemoryPtr()->Create(config.outConfs[outputPort].getMemDesc(), memMgr); + + childEdge->changeStatus(Edge::Status::Allocated); + } + } else { + Node::resolveInPlaceEdges(); + } +} + } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/gather.h b/src/plugins/intel_cpu/src/nodes/gather.h index d89c94f437f7c4..3d323529fca402 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.h +++ b/src/plugins/intel_cpu/src/nodes/gather.h @@ -24,6 +24,8 @@ class Gather : public Node { void createPrimitive() override; void execute(dnnl::stream strm) override; bool created() const override; + bool isExecutable() const override; + void resolveInPlaceEdges() override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -79,6 +81,7 @@ class Gather : public Node { uint64_t totalWork = 0lu; std::vector execParamsPerThread; + std::vector constIndices; static constexpr size_t GATHER_DATA = 0; static constexpr size_t GATHER_INDICES = 1; From 97d813d6056d9fbea17d20c08d3a3744ce26fa0c Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 26 May 2023 12:53:34 +0200 Subject: [PATCH 09/28] Temporal WA to enable zero copy on Split input --- src/plugins/intel_cpu/src/infer_request.cpp | 68 +++++++++++---------- src/plugins/intel_cpu/src/infer_request.h | 2 +- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index fc9757447330c7..2c4c8f6f53d323 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -193,8 +193,12 @@ std::map InferRequestB return perfMap; } -static inline void changeEdgePtr(const EdgePtr &edge, void *newPtr) { - edge->getMemoryPtr()->setDataHandle(newPtr); +static inline void changeEdgePtr(const EdgePtr &edge, InferenceEngine::Blob::Ptr blob) { + auto size = blob->byteSize(); + auto& mem = edge->getMemory(); + auto memMngr = mem.getMemoryMngr(); + IE_ASSERT(memMngr); + memMngr->setExtBuff(blob->buffer(), size); } void InferRequestBase::changeDefaultPtr() { @@ -203,7 +207,7 @@ void InferRequestBase::changeDefaultPtr() { auto input = inputNodesMap.find(it.first); if (input != inputNodesMap.end()) { NodePtr inputNodePtr = input->second; - if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetData() == it.second) + if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetData() == it.second->buffer()) continue; auto& childEdges = inputNodePtr->getChildEdges(); // Input cannot be in-place with other primitives @@ -225,28 +229,28 @@ void InferRequestBase::changeDefaultPtr() { break; } - // Cannot be in-place before split because split is using different ptrs without offsets - if (child->getType() == Type::Split) { - canBeInPlace = false; - break; - } + // // Cannot be in-place before split because split is using different ptrs without offsets + // if (child->getType() == Type::Split) { + // canBeInPlace = false; + // break; + // } - if (child->isInPlace()) { + if (child->isInPlace() && child->getType() != Type::Split) { canBeInPlace = false; break; } - auto& edges = child->getChildEdges(); - for (auto& edge : edges) { - auto e = edge.lock(); - if (!e) - IE_THROW() << "Node " << child->getName() << " contains empty child edge"; + // auto& edges = child->getChildEdges(); + // for (auto& edge : edges) { + // auto e = edge.lock(); + // if (!e) + // IE_THROW() << "Node " << child->getName() << " contains empty child edge"; - if (e->getMemory().GetData() == ce->getMemory().GetData()) { - canBeInPlace = false; - break; - } - } + // if (e->getMemory().GetData() == ce->getMemory().GetData()) { + // canBeInPlace = false; + // break; + // } + // } if (!canBeInPlace) break; @@ -268,7 +272,7 @@ void InferRequestBase::changeDefaultPtr() { auto output = outputNodesMap.find(it.first); if (output != outputNodesMap.end()) { auto parentEdge = output->second->getParentEdgeAt(0); - if (parentEdge->getMemory().GetData() == it.second) + if (parentEdge->getMemory().GetData() == static_cast(it.second->buffer())) continue; bool canBeInPlace = true; @@ -370,16 +374,16 @@ void LegacyInferRequest::changeDefaultPtr() { for (auto &it : inMap) { const auto &name = it.first; auto itr = externalPtr.find(name); - if (itr != externalPtr.end() && itr->second != _inputs[name]->buffer()) { - itr->second = _inputs[name]->buffer(); + if (itr != externalPtr.end() && !(itr->second->buffer() == _inputs[name]->buffer())) { + itr->second = _inputs[name]; } } const auto &outMap = graph->outputNodesMap; for (auto &it : outMap) { const auto &name = it.first; auto itr = externalPtr.find(name); - if (itr != externalPtr.end() && itr->second != _outputs[name]->buffer()) { - itr->second = _outputs[name]->buffer(); + if (itr != externalPtr.end() && !(itr->second->buffer() == _outputs[name]->buffer())) { + itr->second = _outputs[name]; } } InferRequestBase::changeDefaultPtr(); @@ -446,7 +450,7 @@ void LegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine: auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory()); if (data->getTensorDesc() == pBlobDesc && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getConfig().batchLimit) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); } @@ -480,7 +484,7 @@ void LegacyInferRequest::SetBlob(const std::string& name, const InferenceEngine: auto pBlobDesc = MemoryDescUtils::interpretAsBlobDesc(graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory()); if (data->getTensorDesc() == pBlobDesc && !graph->getConfig().batchLimit) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); } @@ -525,7 +529,7 @@ InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name) _inputs[name]->allocate(); if (pBlob->getTensorDesc() == desc && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getConfig().batchLimit) { - externalPtr[name] = _inputs[name]->buffer(); + externalPtr[name] = _inputs[name]; } } data = _inputs[name]; @@ -587,7 +591,7 @@ InferenceEngine::Blob::Ptr LegacyInferRequest::GetBlob(const std::string& name) _outputs[name] = data; if (!externalPtr.count(name) && data->getTensorDesc() == pBlobDesc && !graph->getConfig().batchLimit) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } } data = _outputs[name]; @@ -704,7 +708,7 @@ void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob: } if (actualDesc->isCompatible(MemoryDescUtils::convertToCpuBlockedMemoryDesc(blobDesc)) && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getConfig().batchLimit) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); } @@ -736,7 +740,7 @@ void InferRequest::SetBlob(const std::string& name, const InferenceEngine::Blob: const auto &desc = graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory().getDesc(); if (!isDynamic && blobDesc == MemoryDescUtils::convertToTensorDesc(desc) && !graph->getConfig().batchLimit) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); } @@ -784,7 +788,7 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { if (!isDynamic && desc == MemoryDescUtils::convertToTensorDesc(graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getConfig().batchLimit) { - externalPtr[name] = _inputs[name]->buffer(); + externalPtr[name] = _inputs[name]; } } else { IE_THROW() << "Blob with name: " << name << " exists in CPU plugin graph, but absents in network inputs"; @@ -844,7 +848,7 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { if (!isDynamic && !externalPtr.count(name) && data->getTensorDesc() == MemoryDescUtils::convertToTensorDesc(output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc()) && !graph->getConfig().batchLimit) { - externalPtr[name] = data->buffer(); + externalPtr[name] = data; } } else { IE_THROW() << "Blob with name: " << name << " exists in CPU plugin graph, but absents in network outputs"; diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h index 06fb4a2b805eaf..b1e98f97152752 100644 --- a/src/plugins/intel_cpu/src/infer_request.h +++ b/src/plugins/intel_cpu/src/infer_request.h @@ -56,7 +56,7 @@ class InferRequestBase : public InferenceEngine::IInferRequestInternal { virtual void PushInputData() = 0; Graph* graph = nullptr; - std::unordered_map externalPtr; + std::unordered_map externalPtr; private: void PushStates(); From 21a5226b443fd2b80538bb424b4bfd65dcc5f40c Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 26 May 2023 19:01:56 +0200 Subject: [PATCH 10/28] Process inPlace edges in order --- src/plugins/intel_cpu/src/edge.h | 6 +-- src/plugins/intel_cpu/src/graph.cpp | 42 ++++++++++++---- src/plugins/intel_cpu/src/node.cpp | 50 +++++++++++-------- src/plugins/intel_cpu/src/node.h | 2 +- src/plugins/intel_cpu/src/nodes/concat.cpp | 15 +++--- src/plugins/intel_cpu/src/nodes/concat.h | 2 +- src/plugins/intel_cpu/src/nodes/gather.cpp | 12 ++--- src/plugins/intel_cpu/src/nodes/gather.h | 2 +- src/plugins/intel_cpu/src/nodes/split.cpp | 12 ++--- src/plugins/intel_cpu/src/nodes/split.h | 2 +- .../intel_cpu/src/partitioned_mem_mgr.cpp | 48 +++--------------- .../intel_cpu/src/partitioned_mem_mgr.h | 14 +++--- 12 files changed, 100 insertions(+), 107 deletions(-) diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index 8647ea3f779f3a..df74d5ee8496ae 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -43,11 +43,14 @@ class Edge { No = 2 }; + enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN }; + inline Status getStatus() const noexcept { return status; } void changeStatus(Status state); + bool inPlace(LOOK look = LOOK_BOTH) const; void init(); void allocate(const void* mem_ptr = nullptr); @@ -103,10 +106,7 @@ class Edge { void collectConsumers(std::vector>& result) const; - enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN }; - EdgePtr getBaseEdge(int look = LOOK_BOTH); - bool inPlace(LOOK look = LOOK_BOTH) const; void allocateCommon(const std::function& allocate); friend class Graph; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index bc3787336a7c7f..7742118005cb6b 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -765,9 +765,9 @@ static edge_clusters_t findEdgeClusters(const std::vector & graphEdges) void Graph::AllocateWithReuse() { edge_clusters_t edge_clusters = findEdgeClusters(graphEdges); - size_t edge_clusters_count = edge_clusters.size(); + size_t remaining_edge_clusters_count = edge_clusters.size(); - for (size_t i = 0; i < edge_clusters_count;) { + for (size_t i = 0; i < remaining_edge_clusters_count;) { auto &cluster = edge_clusters[i]; bool erase = false; for (auto &edge : cluster) { @@ -784,21 +784,19 @@ void Graph::AllocateWithReuse() { } if (erase) { - std::swap(edge_clusters[i], edge_clusters[edge_clusters_count - 1]); - --edge_clusters_count; + std::swap(edge_clusters[i], edge_clusters[remaining_edge_clusters_count - 1]); + --remaining_edge_clusters_count; } else { ++i; } } - edge_clusters.resize(edge_clusters_count); - const int64_t alignment = 32; // 32 bytes std::vector definedBoxes; std::vector undefinedBoxes; - for (size_t i = 0; i < edge_clusters.size(); i++) { - MemorySolver::Box box = {std::numeric_limits::max(), 0, 0, static_cast(i)}; + for (int i = 0; i < remaining_edge_clusters_count; i++) { + MemorySolver::Box box = { std::numeric_limits::max(), 0, 0, static_cast(i) }; int64_t boxSize = 0; for (auto &edge : edge_clusters[i]) { int e_start = edge->getParent()->execIndex; @@ -940,6 +938,32 @@ void Graph::AllocateWithReuse() { } } } + + // Resolve all other edges with status NotAllocated and in-place + for (auto& cluster : edge_clusters) { + for (auto& edge : cluster) { + if (edge->getStatus() == Edge::Status::NotAllocated) { + std::vector edges_to_process; + edges_to_process.push_back(edge); + for (auto next_edge = edge->getSharedEdge(std::nothrow); + next_edge; + next_edge = next_edge->getSharedEdge(std::nothrow)) { + edges_to_process.push_back(next_edge); + } + std::for_each(edges_to_process.rbegin(), edges_to_process.rend(), [](const EdgePtr& edge){ + if (edge->getStatus() == Edge::Status::NotAllocated) { + if (edge->inPlace(Edge::LOOK_DOWN)) { + edge->getChild()->resolveInPlaceEdges(Edge::LOOK_DOWN); + } else if (edge->inPlace(Edge::LOOK_UP)) { + edge->getParent()->resolveInPlaceEdges(Edge::LOOK_UP); + } else { + edge->getMemory(); + } + } + }); + } + } + } } void Graph::Allocate() { @@ -954,7 +978,7 @@ void Graph::Allocate() { AllocateWithReuse(); // Resolve all other edges with status NotAllocated and in-place - for (auto& node : graphNodes) node->resolveInPlaceEdges(); + //for (auto& node : graphNodes) node->resolveInPlaceEdges(); // Check all getters. Should work. for (auto& edge : graphEdges) edge->validate(); diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index ee806e8867e08f..279a89511e8775 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -365,37 +365,45 @@ bool Node::canBeInPlace() const { return true; } -void Node::resolveInPlaceEdges() { +void Node::resolveInPlaceEdges(Edge::LOOK look) { const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (!selected_pd) IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); - for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { - auto parentEdge = getParentEdgeAt(i); - auto inplaceOutIndx = selected_pd->getConfig().inConfs[i].inPlace(); + if (look & Edge::LOOK_DOWN) { + for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { + auto parentEdge = getParentEdgeAt(i); + auto inplaceOutIndx = selected_pd->getConfig().inConfs[i].inPlace(); - if (inplaceOutIndx < 0) //parentEdge->getStatus() != Edge::Status::NotAllocated || - continue; + if (inplaceOutIndx < 0) //parentEdge->getStatus() != Edge::Status::NotAllocated || + continue; - auto childEdge = getChildEdgesAtPort(inplaceOutIndx).front(); - auto memMgr = std::make_shared(childEdge); - parentEdge->getMemoryPtr().reset(new Memory(getEngine())); - parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMgr); + IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); - parentEdge->changeStatus(Edge::Status::Allocated); + auto baseMemMngr = getChildEdgesAtPort(inplaceOutIndx)[0]->getMemory().getMemoryMngr(); + auto memMngr = std::make_shared(baseMemMngr); + parentEdge->getMemoryPtr().reset(new Memory(getEngine())); + parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); + + parentEdge->changeStatus(Edge::Status::Allocated); + } } - for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) { - auto childEdge = getChildEdgeAt(i); - auto inplaceInpIndx = selected_pd->getConfig().outConfs[i].inPlace(); + if (look & Edge::LOOK_UP) { + for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) { + auto childEdge = getChildEdgeAt(i); + auto inplaceInpIndx = selected_pd->getConfig().outConfs[i].inPlace(); - if (inplaceInpIndx < 0) //childEdge->getStatus() != Edge::Status::NotAllocated || - continue; + if (inplaceInpIndx < 0) //childEdge->getStatus() != Edge::Status::NotAllocated || + continue; - auto parentEdge = getParentEdgesAtPort(inplaceInpIndx).front(); - auto memMgr = std::make_shared(parentEdge); - childEdge->getMemoryPtr().reset(new Memory(getEngine())); - childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMgr); + IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected inplace resolve call to an allocated edge: " << childEdge->name(); - childEdge->changeStatus(Edge::Status::Allocated); + auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); + auto memMngr = std::make_shared(baseMemMngr); + childEdge->getMemoryPtr().reset(new Memory(getEngine())); + childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); + + childEdge->changeStatus(Edge::Status::Allocated); + } } } diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 625699a92be726..8a9c3c9b74f3f1 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -361,7 +361,7 @@ class Node { PerfCount &PerfCounter() { return perfCounter; } - virtual void resolveInPlaceEdges(); + virtual void resolveInPlaceEdges(Edge::LOOK look = Edge::LOOK_BOTH); virtual void execute(dnnl::stream strm) = 0; void updateShapes(); diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index 4b0bb67eace259..8436c2344ae302 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -625,29 +625,28 @@ void Concat::execRef() { } } -void Concat::resolveInPlaceEdges() { - if (isInPlace()) { +void Concat::resolveInPlaceEdges(Edge::LOOK look) { + if ((look & Edge::LOOK_DOWN) && isInPlace()) { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; auto& config = selected_pd->getConfig(); size_t numberOfInputs = config.inConfs.size(); size_t inplaceOutIndx = selected_pd->getConfig().inConfs[0].inPlace(); - auto childEdge = getChildEdgesAtPort(inplaceOutIndx).front(); + auto baseMemMngr = getChildEdgesAtPort(inplaceOutIndx).front()->getMemory().getMemoryMngr(); for (size_t i = 0; i < numberOfInputs; ++i) { auto parentEdge = getParentEdgeAt(i); - // IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected edge status in node: " << - // getName() << " with type " << getTypeStr(); + IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); - auto memMgr = std::make_shared(childEdge, numberOfInputs, i); + auto memMngr = std::make_shared(baseMemMngr, numberOfInputs, i); parentEdge->getMemoryPtr().reset(new Memory(getEngine())); - parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMgr); + parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); parentEdge->changeStatus(Edge::Status::Allocated); } } else { - Node::resolveInPlaceEdges(); + Node::resolveInPlaceEdges(look); } } diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h index f17a706fccd737..e9a4c9e764a7b3 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.h +++ b/src/plugins/intel_cpu/src/nodes/concat.h @@ -26,7 +26,7 @@ class Concat : public Node { bool created() const override; void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override { execute(strm); } - void resolveInPlaceEdges() override; + void resolveInPlaceEdges(Edge::LOOK look) override; InferenceEngine::Precision getRuntimePrecision() const override; diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index 1ac5392fc7985f..4ce9c15f3501ad 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -587,8 +587,8 @@ bool Gather::isExecutable() const { return !isInPlace() && Node::isExecutable(); } -void Gather::resolveInPlaceEdges() { - if (isInPlace()) { +void Gather::resolveInPlaceEdges(Edge::LOOK look) { + if ((look & Edge::LOOK_UP) && isInPlace()) { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; @@ -598,7 +598,7 @@ void Gather::resolveInPlaceEdges() { size_t inplaceInpIndx = selected_pd->getConfig().outConfs[outputPort].inPlace(); auto baseDim = inputShapes.front().getDims()[axis]; IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << "Gather node: " << getName() << " can not use inPlace memory with splitting on dynamic dimention"; - auto parentEdge = getParentEdgesAtPort(inplaceInpIndx).front(); + auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); auto index = constIndices.at(0); ptrdiff_t offset = index < 0 ? baseDim + index : index; const auto& childEdges = getChildEdgesAtPort(outputPort); @@ -606,14 +606,14 @@ void Gather::resolveInPlaceEdges() { // IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected edge status in node: " << // getName() << " with type " << getTypeStr(); - auto memMgr = std::make_shared(parentEdge, baseDim, offset); + auto memMngr = std::make_shared(baseMemMngr, baseDim, offset); childEdge->getMemoryPtr().reset(new Memory(getEngine())); - childEdge->getMemoryPtr()->Create(config.outConfs[outputPort].getMemDesc(), memMgr); + childEdge->getMemoryPtr()->Create(config.outConfs[outputPort].getMemDesc(), memMngr); childEdge->changeStatus(Edge::Status::Allocated); } } else { - Node::resolveInPlaceEdges(); + Node::resolveInPlaceEdges(look); } } diff --git a/src/plugins/intel_cpu/src/nodes/gather.h b/src/plugins/intel_cpu/src/nodes/gather.h index 3d323529fca402..f03a08832a66f5 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.h +++ b/src/plugins/intel_cpu/src/nodes/gather.h @@ -25,7 +25,7 @@ class Gather : public Node { void execute(dnnl::stream strm) override; bool created() const override; bool isExecutable() const override; - void resolveInPlaceEdges() override; + void resolveInPlaceEdges(Edge::LOOK look) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index ab0068865efdc1..d23f8ebaba7359 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -522,8 +522,8 @@ void Split::SplitOptimizedExecutor::exec(const uint8_t* srcData, const std::vect }); } -void Split::resolveInPlaceEdges() { - if (isInPlace()) { +void Split::resolveInPlaceEdges(Edge::LOOK look) { + if ((look & Edge::LOOK_UP) && isInPlace()) { auto selected_pd = getSelectedPrimitiveDescriptor(); if (selected_pd == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; @@ -532,7 +532,7 @@ void Split::resolveInPlaceEdges() { size_t inplaceInpIndx = selected_pd->getConfig().outConfs[0].inPlace(); auto baseDim = inputShapes.front().getDims()[axis]; IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << "Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimention"; - auto parentEdge = getParentEdgesAtPort(inplaceInpIndx).front(); + auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); ptrdiff_t offset = 0; for (size_t i = 0; i < numberOfOutputs; ++i) { auto partDim = outputShapes[i].getDims()[axis]; @@ -542,16 +542,16 @@ void Split::resolveInPlaceEdges() { // IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected edge status in node: " << // getName() << " with type " << getTypeStr(); - auto memMgr = std::make_shared(parentEdge, baseDim, offset, partDim); + auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); childEdge->getMemoryPtr().reset(new Memory(getEngine())); - childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMgr); + childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); childEdge->changeStatus(Edge::Status::Allocated); } offset += partDim; } } else { - Node::resolveInPlaceEdges(); + Node::resolveInPlaceEdges(look); } } diff --git a/src/plugins/intel_cpu/src/nodes/split.h b/src/plugins/intel_cpu/src/nodes/split.h index 4d45878f4fd229..5402d748832d7d 100644 --- a/src/plugins/intel_cpu/src/nodes/split.h +++ b/src/plugins/intel_cpu/src/nodes/split.h @@ -31,7 +31,7 @@ class Split : public Node { bool needShapeInfer() const override; void prepareParams() override; void executeDynamicImpl(dnnl::stream strm) override { execute(strm); } - void resolveInPlaceEdges() override; + void resolveInPlaceEdges(Edge::LOOK look) override; private: struct SplitExecutor { diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp index 75e4bd95b1822c..a712cbcd2749bd 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp @@ -6,64 +6,28 @@ using namespace ov::intel_cpu; -MemoryMngrPtr PartitionedMemoryMngr::sourceMemMngrNoThrow() const noexcept { - if (auto pEdge = m_wEdge.lock()) { - MemoryPtr pMem = nullptr; - try { - pMem = pEdge->getMemoryPtr(); - } - catch(...) { - return nullptr; - } - if (pMem) { - if (auto memMngr = pMem->getMemoryMngr()) { - return memMngr; - } - } - } - return nullptr; -} - -MemoryMngrPtr PartitionedMemoryMngr::sourceMemMngr() const { - auto memMngr = sourceMemMngrNoThrow(); - IE_ASSERT(memMngr != nullptr) << "PartitionedMemoryMngr references nullptr"; - return memMngr; -} - void* PartitionedMemoryMngr::getRawPtr() const noexcept { - if (auto memMngr = sourceMemMngrNoThrow()) { - return static_cast(memMngr->getRawPtr()) + m_offset_blocks * m_size / m_size_blocks; - } - return nullptr; + return static_cast(m_pMngr->getRawPtr()) + m_offset_blocks * m_size / m_size_blocks; } void PartitionedMemoryMngr::setExtBuff(void* ptr, size_t size) { - auto memMngr = sourceMemMngr(); - memMngr->setExtBuff(ptr, size); + m_pMngr->setExtBuff(ptr, size); } bool PartitionedMemoryMngr::resize(size_t size) { - auto memMngr = sourceMemMngr(); m_size = size; - return memMngr->resize(m_size * m_total_blocks / m_size_blocks); + return m_pMngr->resize(m_size * m_total_blocks / m_size_blocks); } bool PartitionedMemoryMngr::hasExtBuffer() const noexcept { - if (auto memMngr = sourceMemMngrNoThrow()) { - return memMngr->hasExtBuffer(); - } - return false; + return m_pMngr->hasExtBuffer(); } void PartitionedMemoryMngr::registerMemory(Memory* memPtr) { - auto memMngr = sourceMemMngr(); - memMngr->registerMemory(memPtr); + m_pMngr->registerMemory(memPtr); } void PartitionedMemoryMngr::unregisterMemory(Memory* memPtr) { - if (!m_wEdge.expired()) { - auto memMngr = sourceMemMngr(); - memMngr->unregisterMemory(memPtr); - } + m_pMngr->unregisterMemory(memPtr); } diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h index bfdcb9dabccebd..94f5f9288d27f8 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h @@ -4,15 +4,17 @@ #pragma once -#include "edge.h" +#include "cpu_memory.h" namespace ov { namespace intel_cpu { class PartitionedMemoryMngr : public IMemoryMngrObserver { public: - PartitionedMemoryMngr(EdgePtr pEdge, size_t total_blocks = 1, ptrdiff_t offset_blocks = 0, size_t size_blocks = 1) - : m_wEdge(pEdge), m_total_blocks(total_blocks), m_offset_blocks(offset_blocks), m_size_blocks(size_blocks) {} + PartitionedMemoryMngr(MemoryMngrPtr pMngr, size_t total_blocks = 1, ptrdiff_t offset_blocks = 0, size_t size_blocks = 1) + : m_pMngr(pMngr), m_total_blocks(total_blocks), m_offset_blocks(offset_blocks), m_size_blocks(size_blocks) { + IE_ASSERT(m_pMngr) << "Memory manager is uninitialized"; + } void* getRawPtr() const noexcept override; void setExtBuff(void* ptr, size_t size) override; @@ -22,11 +24,7 @@ class PartitionedMemoryMngr : public IMemoryMngrObserver { void unregisterMemory(Memory* memPtr) override; private: - MemoryMngrPtr sourceMemMngr() const; - MemoryMngrPtr sourceMemMngrNoThrow() const noexcept; - -private: - EdgeWeakPtr m_wEdge; + MemoryMngrPtr m_pMngr; size_t m_total_blocks = 1; // size of the parent memory in blocks ptrdiff_t m_offset_blocks = 0; // offset from the base pointer in blocks size_t m_size_blocks = 1; // size of the partition in blocks From 4999dd34cc951af166f24ac1cf1ba5272e5eb869 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 31 May 2023 17:33:10 +0200 Subject: [PATCH 11/28] Fixes --- src/plugins/intel_cpu/src/graph.cpp | 4 ++-- src/plugins/intel_cpu/src/node.cpp | 20 ++++++++++--------- .../intel_cpu/tests/unit/dnnl_memory_test.cpp | 2 +- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 7742118005cb6b..9b918189695f61 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -703,7 +703,7 @@ void Graph::InitEdges() { numberOfEdges = graphEdges.size(); //update the total number - for (auto i = 0; i < numberOfEdges; i++) { + for (ptrdiff_t i = 0; i < numberOfEdges; i++) { auto edge = graphEdges[i]; if (needReorder(edge)) { constexpr bool optimizedReorder = false; @@ -795,7 +795,7 @@ void Graph::AllocateWithReuse() { std::vector definedBoxes; std::vector undefinedBoxes; - for (int i = 0; i < remaining_edge_clusters_count; i++) { + for (size_t i = 0; i < remaining_edge_clusters_count; i++) { MemorySolver::Box box = { std::numeric_limits::max(), 0, 0, static_cast(i) }; int64_t boxSize = 0; for (auto &edge : edge_clusters[i]) { diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 279a89511e8775..66b14e058450ed 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -371,13 +371,13 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); if (look & Edge::LOOK_DOWN) { for (size_t i = 0; i < getParentEdges().size() && i < selected_pd->getConfig().inConfs.size(); i++) { - auto parentEdge = getParentEdgeAt(i); auto inplaceOutIndx = selected_pd->getConfig().inConfs[i].inPlace(); if (inplaceOutIndx < 0) //parentEdge->getStatus() != Edge::Status::NotAllocated || continue; - IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); + auto parentEdge = getParentEdgeAt(i); + IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); auto baseMemMngr = getChildEdgesAtPort(inplaceOutIndx)[0]->getMemory().getMemoryMngr(); auto memMngr = std::make_shared(baseMemMngr); @@ -389,20 +389,22 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { } if (look & Edge::LOOK_UP) { for (size_t i = 0; i < getChildEdges().size() && i < selected_pd->getConfig().outConfs.size(); i++) { - auto childEdge = getChildEdgeAt(i); auto inplaceInpIndx = selected_pd->getConfig().outConfs[i].inPlace(); if (inplaceInpIndx < 0) //childEdge->getStatus() != Edge::Status::NotAllocated || continue; - IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected inplace resolve call to an allocated edge: " << childEdge->name(); - auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); auto memMngr = std::make_shared(baseMemMngr); - childEdge->getMemoryPtr().reset(new Memory(getEngine())); - childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); - - childEdge->changeStatus(Edge::Status::Allocated); + const auto& childEdges = getChildEdgesAtPort(i); + + for (auto& childEdge : childEdges) { + IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << + " Unexpected inplace resolve call to an allocated edge: " << childEdge->name(); + childEdge->getMemoryPtr().reset(new Memory(getEngine())); + childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); + childEdge->changeStatus(Edge::Status::Allocated); + } } } } diff --git a/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp b/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp index 3775131aa55364..26ed9672a70ebf 100644 --- a/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp @@ -58,7 +58,7 @@ TEST(MemoryTest, ConcurrentResizeGetPrimitive) { Memory cpu_mem1(eng); cpu_mem1.Create(desc); Memory cpu_mem2(eng); - cpu_mem2.Create(desc, cpu_mem1.getDnnlMemoryMngr()); // tie two memory objects (memory reuse) + cpu_mem2.Create(desc, cpu_mem1.getMemoryMngr()); // tie two memory objects (memory reuse) auto desc2 = std::make_shared(Precision::FP32, Shape{10, 20}); std::atomic lock{true}; From d3c71e7a22acab8a305e4209ae726030680e51d7 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 31 May 2023 19:14:54 +0200 Subject: [PATCH 12/28] Remove implicit initialization from Edge::getMemory --- src/plugins/intel_cpu/src/edge.cpp | 26 +++++++------------ src/plugins/intel_cpu/src/edge.h | 3 ++- src/plugins/intel_cpu/src/graph.cpp | 10 ++++++- src/plugins/intel_cpu/src/infer_request.cpp | 2 +- src/plugins/intel_cpu/src/node.cpp | 17 ++++++------ src/plugins/intel_cpu/src/nodes/bin_conv.cpp | 6 ++--- src/plugins/intel_cpu/src/nodes/bucketize.cpp | 6 ++--- src/plugins/intel_cpu/src/nodes/concat.cpp | 6 ++--- src/plugins/intel_cpu/src/nodes/def_conv.cpp | 10 +++---- .../intel_cpu/src/nodes/depth_to_space.cpp | 6 ++--- .../intel_cpu/src/nodes/depth_to_space.h | 2 +- .../intel_cpu/src/nodes/fake_quantize.cpp | 12 ++++----- src/plugins/intel_cpu/src/nodes/gather.cpp | 10 +++---- src/plugins/intel_cpu/src/nodes/gather_nd.cpp | 12 ++++----- src/plugins/intel_cpu/src/nodes/gather_nd.h | 6 ++--- .../intel_cpu/src/nodes/gather_tree.cpp | 2 +- src/plugins/intel_cpu/src/nodes/gather_tree.h | 2 +- .../intel_cpu/src/nodes/grid_sample.cpp | 6 ++--- src/plugins/intel_cpu/src/nodes/if.cpp | 2 +- src/plugins/intel_cpu/src/nodes/input.cpp | 4 +-- .../intel_cpu/src/nodes/interpolate.cpp | 18 ++++++------- src/plugins/intel_cpu/src/nodes/lrn.cpp | 4 +-- src/plugins/intel_cpu/src/nodes/matmul.cpp | 8 +++--- src/plugins/intel_cpu/src/nodes/mvn.cpp | 8 +++--- src/plugins/intel_cpu/src/nodes/normalize.cpp | 4 +-- src/plugins/intel_cpu/src/nodes/pad.cpp | 12 ++++----- src/plugins/intel_cpu/src/nodes/pad.h | 12 ++++----- src/plugins/intel_cpu/src/nodes/pooling.cpp | 4 +-- src/plugins/intel_cpu/src/nodes/priorbox.cpp | 2 +- .../src/nodes/priorbox_clustered.cpp | 2 +- src/plugins/intel_cpu/src/nodes/reduce.cpp | 12 ++++----- src/plugins/intel_cpu/src/nodes/reorder.cpp | 4 +-- src/plugins/intel_cpu/src/nodes/reshape.cpp | 4 +-- .../intel_cpu/src/nodes/reverse_sequence.cpp | 2 +- .../intel_cpu/src/nodes/reverse_sequence.h | 2 +- src/plugins/intel_cpu/src/nodes/roi_align.cpp | 4 +-- src/plugins/intel_cpu/src/nodes/roll.cpp | 2 +- src/plugins/intel_cpu/src/nodes/roll.h | 2 +- .../intel_cpu/src/nodes/scatter_update.cpp | 10 +++---- .../intel_cpu/src/nodes/shuffle_channels.cpp | 6 ++--- .../intel_cpu/src/nodes/space_to_depth.cpp | 4 +-- src/plugins/intel_cpu/src/nodes/split.cpp | 6 ++--- .../intel_cpu/src/nodes/tensoriterator.cpp | 4 +-- src/plugins/intel_cpu/src/nodes/topk.cpp | 12 ++++----- src/plugins/intel_cpu/src/nodes/transpose.cpp | 12 ++++----- src/plugins/intel_cpu/src/nodes/unique.cpp | 4 +-- .../src/concat_reshape_concat.cpp | 2 +- 47 files changed, 158 insertions(+), 158 deletions(-) diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 57e3c25f1692fc..97a86b36bdfa6f 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -416,24 +416,16 @@ const Memory &Edge::getMemory() { return *getMemoryPtr(); } -MemoryPtr &Edge::getMemoryPtr() { +MemoryPtr Edge::getMemoryPtr() const { + return memoryPtr; +} + +void Edge::resetMemoryPtr(MemoryPtr mem) { if (status == Status::NotAllocated) { - memoryPtr.reset(new Memory(getParent()->getEngine())); - const auto &desc = getDesc(); - auto sharedEdge = getSharedEdge(); - auto sharedEdgeParent = sharedEdge->getParent(); - if (sharedEdgeParent->isConstant()) { - memoryPtr->Create(desc, sharedEdge->getMemoryPtr()->GetData()); - DEBUG_LOG(*this, " const sharedEdge with ", *sharedEdge); - } else { - memoryPtr->Create(desc, sharedEdge->getMemoryPtr()->getMemoryMngr()); - DEBUG_LOG(*this, " sharedEdge with ", *sharedEdge); - } memoryFromEdge.reset(); - changeStatus(Status::Allocated); } - - return memoryPtr; + memoryPtr = mem; + changeStatus(Status::Allocated); } void Edge::sharedMemFrom(const EdgePtr &edge) { @@ -445,11 +437,11 @@ void Edge::sharedMemFrom(const EdgePtr &edge) { void Edge::validate() { if (status == Status::Validated) return; - getMemory(); + getParent(); getChild(); - if (status != Status::Allocated) { + if (status != Status::Allocated || !memoryPtr) { IE_THROW() << "Error memory is not allocated!"; } status = Status::Validated; diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index df74d5ee8496ae..483f83f1a8cfbb 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -64,7 +64,8 @@ class Edge { const std::shared_ptr getChild() const; const Memory& getMemory(); - MemoryPtr& getMemoryPtr(); + MemoryPtr getMemoryPtr() const; + void resetMemoryPtr(MemoryPtr mem); ReorderStatus needReorder(); bool isDropped() const; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 9b918189695f61..c17f5fb81d30ee 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -957,7 +957,15 @@ void Graph::AllocateWithReuse() { } else if (edge->inPlace(Edge::LOOK_UP)) { edge->getParent()->resolveInPlaceEdges(Edge::LOOK_UP); } else { - edge->getMemory(); + auto sharedEdge = edge->getSharedEdge(); + auto sharedEdgeParent = sharedEdge->getParent(); + if (sharedEdgeParent->isConstant()) { + edge->allocate(sharedEdge->getMemoryPtr()->GetData()); + DEBUG_LOG(*edge, " const sharedEdge with ", *sharedEdge); + } else { + edge->allocate(sharedEdge->getMemoryPtr()->getMemoryMngr()); + DEBUG_LOG(*edge, " sharedEdge with ", *sharedEdge); + } } } }); diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 2c4c8f6f53d323..a3dd6c78210c34 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -207,7 +207,7 @@ void InferRequestBase::changeDefaultPtr() { auto input = inputNodesMap.find(it.first); if (input != inputNodesMap.end()) { NodePtr inputNodePtr = input->second; - if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetData() == it.second->buffer()) + if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetData() == static_cast(it.second->buffer())) continue; auto& childEdges = inputNodePtr->getChildEdges(); // Input cannot be in-place with other primitives diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 66b14e058450ed..4912954f689fc3 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -381,10 +381,9 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { auto baseMemMngr = getChildEdgesAtPort(inplaceOutIndx)[0]->getMemory().getMemoryMngr(); auto memMngr = std::make_shared(baseMemMngr); - parentEdge->getMemoryPtr().reset(new Memory(getEngine())); - parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); - - parentEdge->changeStatus(Edge::Status::Allocated); + auto newMem = std::make_shared(getEngine()); + newMem->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); + parentEdge->resetMemoryPtr(newMem); } } if (look & Edge::LOOK_UP) { @@ -401,9 +400,9 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { for (auto& childEdge : childEdges) { IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected inplace resolve call to an allocated edge: " << childEdge->name(); - childEdge->getMemoryPtr().reset(new Memory(getEngine())); - childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); - childEdge->changeStatus(Edge::Status::Allocated); + auto newMem = std::make_shared(getEngine()); + newMem->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); + childEdge->resetMemoryPtr(newMem); } } } @@ -1684,7 +1683,7 @@ int Node::inPlaceInputPort(int portIdx) const { const auto& conf = selected_pd->getConfig(); - IE_ASSERT(portIdx >= 0 && portIdx < conf.inConfs.size()) << + IE_ASSERT(portIdx >= 0 && portIdx < static_cast(conf.inConfs.size())) << "Wrong portIndx: " << portIdx << " acceptable interval: [0, " << conf.inConfs.size() << ")"; return conf.inConfs[portIdx].inPlace(); @@ -1696,7 +1695,7 @@ int Node::inPlaceOutPort(int portIdx) const { const auto& conf = selected_pd->getConfig(); - IE_ASSERT(portIdx >= 0 && portIdx < conf.outConfs.size()) << + IE_ASSERT(portIdx >= 0 && portIdx < static_cast(conf.outConfs.size())) << "Wrong portIndx: " << portIdx << " acceptable interval: [0, " << conf.outConfs.size() << ")"; return conf.outConfs[portIdx].inPlace(); diff --git a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp index 90b560977b7932..5965ed9ba3e78e 100644 --- a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp @@ -1298,9 +1298,9 @@ void BinaryConvolution::executeReference(const uint8_t* src, const uint8_t* weig } void BinaryConvolution::execute(dnnl::stream strm) { - auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); - auto &weightsMemory = getParentEdgeAt(1)->getMemoryPtr(); - auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemory = getParentEdgeAt(0)->getMemoryPtr(); + auto weightsMemory = getParentEdgeAt(1)->getMemoryPtr(); + auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); auto src = reinterpret_cast(srcMemory->GetPtr()); auto weights = reinterpret_cast(weightsMemory->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/bucketize.cpp b/src/plugins/intel_cpu/src/nodes/bucketize.cpp index 3098121f75049b..32627dbc77f131 100644 --- a/src/plugins/intel_cpu/src/nodes/bucketize.cpp +++ b/src/plugins/intel_cpu/src/nodes/bucketize.cpp @@ -177,9 +177,9 @@ void Bucketize::execute(dnnl::stream strm) { } void Bucketize::prepareParams() { - auto& inputTensorMemPtr = getParentEdgeAt(INPUT_TENSOR_PORT)->getMemoryPtr(); - auto& inputBinsMemPtr = getParentEdgeAt(INPUT_BINS_PORT)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto inputTensorMemPtr = getParentEdgeAt(INPUT_TENSOR_PORT)->getMemoryPtr(); + auto inputBinsMemPtr = getParentEdgeAt(INPUT_BINS_PORT)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate."; if (!inputTensorMemPtr || !inputTensorMemPtr->isAllocated()) diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index 8436c2344ae302..ad4bb56ba729d7 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -640,10 +640,10 @@ void Concat::resolveInPlaceEdges(Edge::LOOK look) { IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); auto memMngr = std::make_shared(baseMemMngr, numberOfInputs, i); - parentEdge->getMemoryPtr().reset(new Memory(getEngine())); - parentEdge->getMemoryPtr()->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); + auto newMem = std::make_shared(getEngine()); + newMem->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); - parentEdge->changeStatus(Edge::Status::Allocated); + parentEdge->resetMemoryPtr(newMem); } } else { Node::resolveInPlaceEdges(look); diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp index baed2bea09df7c..3d8be09f196dbf 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp @@ -1165,10 +1165,10 @@ void DeformableConvolution::DefConvRefExecutor::exec(const float* src, const flo } void DeformableConvolution::prepareParams() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); - auto& offMemPtr = getParentEdgeAt(OFF_ID)->getMemoryPtr(); - auto& weiMemPtr = getParentEdgeAt(WEI_ID)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto offMemPtr = getParentEdgeAt(OFF_ID)->getMemoryPtr(); + auto weiMemPtr = getParentEdgeAt(WEI_ID)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate destination memory"; @@ -1180,7 +1180,7 @@ void DeformableConvolution::prepareParams() { IE_THROW() << errorPrefix << " did not allocate weights memory"; if (getOriginalInputsNumber() > 3) { - auto& modMemPtr = getParentEdgeAt(MOD_ID)->getMemoryPtr(); + auto modMemPtr = getParentEdgeAt(MOD_ID)->getMemoryPtr(); if (!modMemPtr || !modMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate modulations memory"; } diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp index 2c6927285f1642..4d18e8ae720f03 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp @@ -161,8 +161,8 @@ void DepthToSpace::initSupportedPrimitiveDescriptors() { } void DepthToSpace::createPrimitive() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) THROW_ERROR << "has not allocated destination memory"; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -288,7 +288,7 @@ DepthToSpace::DepthToSpaceExecutor::DepthToSpaceExecutor(const DepthToSpaceAttrs permuteKernel = std::unique_ptr(new PermuteKernel(params)); } -void DepthToSpace::DepthToSpaceExecutor::exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB) { +void DepthToSpace::DepthToSpaceExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr, const int MB) { if (!permuteKernel) IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled."; diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.h b/src/plugins/intel_cpu/src/nodes/depth_to_space.h index 4088379944cb08..6fce5829dc7c4e 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.h +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.h @@ -46,7 +46,7 @@ class DepthToSpace : public Node { DepthToSpaceAttrs attrs; struct DepthToSpaceExecutor { DepthToSpaceExecutor(const DepthToSpaceAttrs& attrs); - void exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const int MB); + void exec(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr, const int MB); ~DepthToSpaceExecutor() = default; private: diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp index b7b0cd1e4411a7..7e507174efce4f 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp @@ -1486,8 +1486,8 @@ void FakeQuantize::createPrimitive() { } void FakeQuantize::executeReference() { - auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); - auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemory = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); auto src = reinterpret_cast(srcMemory->GetPtr()); @@ -1596,8 +1596,8 @@ void FakeQuantize::executeReference() { } void FakeQuantize::executeBinarization(const std::unique_ptr &pKernel) const { #if defined(OPENVINO_ARCH_X86_64) - const auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); - auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemory = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); auto src = reinterpret_cast(srcMemory->GetPtr()); auto dst = reinterpret_cast(dstMemory->GetPtr()); @@ -1638,8 +1638,8 @@ void FakeQuantize::executeBinarization(const std::unique_ptr &pKernel) const { #if defined(OPENVINO_ARCH_X86_64) - auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr(); - auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemory = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); auto src = reinterpret_cast(srcMemory->GetPtr()); auto dst = reinterpret_cast(dstMemory->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index 4ce9c15f3501ad..21720918ab91c0 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -308,10 +308,10 @@ bool Gather::needPrepareParams() const { } void Gather::prepareParams() { - auto& dataMemPtr = getParentEdgeAt(GATHER_DATA)->getMemoryPtr(); + auto dataMemPtr = getParentEdgeAt(GATHER_DATA)->getMemoryPtr(); if (!dataMemPtr || !dataMemPtr->isAllocated()) THROW_ERROR << " has not allocated input data memory."; - auto& idxMemPtr = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr(); + auto idxMemPtr = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr(); if (!idxMemPtr || !idxMemPtr->isAllocated()) THROW_ERROR << " has not allocated input indices memory."; if (getSelectedPrimitiveDescriptor() == nullptr) @@ -607,10 +607,10 @@ void Gather::resolveInPlaceEdges(Edge::LOOK look) { // getName() << " with type " << getTypeStr(); auto memMngr = std::make_shared(baseMemMngr, baseDim, offset); - childEdge->getMemoryPtr().reset(new Memory(getEngine())); - childEdge->getMemoryPtr()->Create(config.outConfs[outputPort].getMemDesc(), memMngr); + auto newMem = std::make_shared(getEngine()); + newMem->Create(config.outConfs[outputPort].getMemDesc(), memMngr); - childEdge->changeStatus(Edge::Status::Allocated); + childEdge->resetMemoryPtr(newMem); } } else { Node::resolveInPlaceEdges(look); diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp index 5310bb3030870a..1d827d4f8d7f1d 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp @@ -84,9 +84,9 @@ void GatherND::initSupportedPrimitiveDescriptors() { } void GatherND::prepareParams() { - auto& srcMemPtr = getParentEdgeAt(GATHERND_DATA)->getMemoryPtr(); - auto& idxMemPtr = getParentEdgeAt(GATHERND_INDEXES)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(GATHERND_DATA)->getMemoryPtr(); + auto idxMemPtr = getParentEdgeAt(GATHERND_INDEXES)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) THROW_ERROR << " has not allocated input memory of 'data'."; if (!idxMemPtr || !idxMemPtr->isAllocated()) @@ -136,7 +136,7 @@ void GatherND::execute(dnnl::stream strm) { getChildEdgeAt(0)->getMemoryPtr()); } -void GatherND::GatherNDExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr) { +void GatherND::GatherNDExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr) { if (dataLength > 1) { gatherBlocks(srcMemPtr, idxMemPtr, dstMemPtr); return; @@ -149,7 +149,7 @@ void GatherND::GatherNDExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPt OV_CASE(sizeof(PrecisionTrait::value_type), PrecisionTrait::value_type)); } -void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr) { +void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr) { const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); const int32_t* indices = reinterpret_cast(idxMemPtr->GetPtr()); uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); @@ -186,7 +186,7 @@ void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const } template -void GatherND::GatherNDExecutor::gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr) { +void GatherND::GatherNDExecutor::gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr) { const dataType* srcData = reinterpret_cast(srcMemPtr->GetPtr()); const int32_t* indices = reinterpret_cast(idxMemPtr->GetPtr()); dataType* dstData = reinterpret_cast(dstMemPtr->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.h b/src/plugins/intel_cpu/src/nodes/gather_nd.h index d74aed1503fdc4..0fec5e23337354 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.h +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.h @@ -43,12 +43,12 @@ class GatherND : public Node { struct GatherNDExecutor { GatherNDExecutor(const GatherNDAttributes& attrs); ~GatherNDExecutor() = default; - void exec(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr); + void exec(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr); private: template - void gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr); - void gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, MemoryPtr& dstMemPtr); + void gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr); + void gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr); size_t batchSize = 1lu; size_t cycles = 1lu; diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp index e7805dfe779d16..16cec71e2043fb 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp @@ -140,7 +140,7 @@ GatherTree::GatherTreeExecutor::GatherTreeExecutor(const VectorDims& stepIdxDims template void GatherTree::GatherTreeExecutor::exec(const MemoryPtr& stepIdxMemPtr, const MemoryPtr& parentIdxMemPtr, - const MemoryPtr& maxSeqLenMemPtr, const MemoryPtr& endTokenMemPtr, MemoryPtr& dstMemPtr) { + const MemoryPtr& maxSeqLenMemPtr, const MemoryPtr& endTokenMemPtr, const MemoryPtr& dstMemPtr) { const auto *stepIdx = reinterpret_cast(stepIdxMemPtr->GetPtr()); const auto *parentIdx = reinterpret_cast(parentIdxMemPtr->GetPtr()); const auto *maxSeqLen = reinterpret_cast(maxSeqLenMemPtr->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.h b/src/plugins/intel_cpu/src/nodes/gather_tree.h index e0f8bc38997928..d67dc088f9242e 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.h +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.h @@ -38,7 +38,7 @@ class GatherTree : public Node { const MemoryPtr& parentIdxMemPtr, const MemoryPtr& maxSeqLenMemPtr, const MemoryPtr& endTokenMemPtr, - MemoryPtr& dstMemPtr); + const MemoryPtr& dstMemPtr); private: const int32_t maxTime; diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index 638512653e3b0e..f60dd82e1c811b 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -179,13 +179,13 @@ void GridSample::createPrimitive() { } void GridSample::prepareParams() { - auto& dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); + auto dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); if (!dataMemPtr || !dataMemPtr->isAllocated()) THROW_ERROR << " has not allocated input data memory."; - auto& gridMemPtr = getParentEdgeAt(IN_GRID)->getMemoryPtr(); + auto gridMemPtr = getParentEdgeAt(IN_GRID)->getMemoryPtr(); if (!gridMemPtr || !gridMemPtr->isAllocated()) THROW_ERROR << " has not allocated input grid memory."; - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) THROW_ERROR << " has not allocated output memory."; if (getSelectedPrimitiveDescriptor() == nullptr) diff --git a/src/plugins/intel_cpu/src/nodes/if.cpp b/src/plugins/intel_cpu/src/nodes/if.cpp index e4b28cf4b08326..a2388fb5772f6d 100644 --- a/src/plugins/intel_cpu/src/nodes/if.cpp +++ b/src/plugins/intel_cpu/src/nodes/if.cpp @@ -190,7 +190,7 @@ void If::prepareBeforeMappers(const bool isThen, const dnnl::engine& eng) { auto &inputMems = isThen ? inputMemThen : inputMemElse; auto &beforeMappers = isThen ? beforeThenMappers : beforeElseMappers; for (auto& map_rule : inputPortMap) { - auto &fromMem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); + auto fromMem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &toMems = inputMems[map_rule.to]; beforeMappers.emplace_back(std::make_shared(fromMem, toMems, eng)); diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 86f866ca49f36e..3ad3275f450c38 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -434,13 +434,13 @@ void Input::initSupportedPrimitiveDescriptors() { void Input::createPrimitive() { for (size_t i = 0; i < getChildEdges().size(); i++) { - auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate for node " << getName() << " to node " << getChildEdgeAt(i)->getChild()->getName() << "."; } for (size_t i = 0; i < getParentEdges().size(); i++) { - auto &srcMemPtr = getParentEdgeAt(i)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(i)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate for node " << getName() << " from node " << getParentEdgeAt(i)->getParent()->getName() << "."; diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp index 195066dd0612a8..95d75a3010df83 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp @@ -2238,26 +2238,26 @@ void Interpolate::prepareParams() { IE_THROW() << "Can't prepare params for Interpolate node with name: " << getName() << ", because input/output dims aren't defined"; } - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate destination memory"; - auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate input memory"; if (shapeCalcMode == InterpolateShapeCalcMode::sizes) { - auto& tsMemPtr = getParentEdgeAt(TARGET_SHAPE_ID)->getMemoryPtr(); + auto tsMemPtr = getParentEdgeAt(TARGET_SHAPE_ID)->getMemoryPtr(); if (!tsMemPtr || !tsMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate target shape memory"; } else { - auto& scaleMemPtr = getParentEdgeAt(get_scale_id())->getMemoryPtr(); + auto scaleMemPtr = getParentEdgeAt(get_scale_id())->getMemoryPtr(); if (!scaleMemPtr || !scaleMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate scales memory"; } if (isAxesSpecified) { - auto &axesMemPtr = getParentEdgeAt(get_axis_id())->getMemoryPtr(); + auto axesMemPtr = getParentEdgeAt(get_axis_id())->getMemoryPtr(); if (!axesMemPtr || !axesMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate axes memory"; } @@ -2353,8 +2353,8 @@ void Interpolate::prepareParams() { } void Interpolate::createPrimitive() { - auto& srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate input memory"; if (!dstMemPtr || !dstMemPtr->isAllocated()) @@ -2443,8 +2443,8 @@ std::vector Interpolate::getScales(const VectorDims &srcDimPad, const Vec } void Interpolate::execute(dnnl::stream strm) { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); if (execPtr) { uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp index ba5ca8366b2b2e..eb06ab8ebcb189 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.cpp +++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp @@ -163,8 +163,8 @@ std::shared_ptr Lrn::getSrcMemDesc(dnnl::primitive_desc_iterator &pr } void Lrn::prepareParams() { - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << errorPrefix << " input memory did not allocate"; if (!dstMemPtr || !dstMemPtr->isAllocated()) diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 7d12ecd4ec414e..fd5440daf8e0c9 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -557,9 +557,9 @@ InferenceEngine::Precision MatMul::getRuntimePrecision() const { } void MatMul::prepareParams() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& src0MemPtr = getParentEdgeAt(0)->getMemoryPtr(); - auto& src1MemPtr = getParentEdgeAt(1)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto src0MemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto src1MemPtr = getParentEdgeAt(1)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate destination memory"; if (!src0MemPtr || !src0MemPtr->isAllocated() || !src1MemPtr || !src1MemPtr->isAllocated()) @@ -597,7 +597,7 @@ void MatMul::prepareParams() { DnnlMemoryDescPtr dnnlBiasMemDesc = nullptr; if (withBiases) { - auto& biasMemory = getParentEdgeAt(2)->getMemoryPtr(); + auto biasMemory = getParentEdgeAt(2)->getMemoryPtr(); if (!biasMemory || !biasMemory->isAllocated()) IE_THROW() << errorPrefix << " did not allocate bias memory"; dnnlBiasMemDesc = biasMemory->GetDescWithType(); diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp index ac479a0042f1f0..0be28a12fb2fa9 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp @@ -1345,8 +1345,8 @@ void MVN::MVNRefExecutor::exec(const uint8_t *src_data, uint8_t *dst_data, const } void MVN::prepareParams() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate."; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -1458,8 +1458,8 @@ void MVN::executeDynamicImpl(dnnl::stream strm) { } void MVN::execute(dnnl::stream strm) { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (execPtr) { uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index f8025c90d238af..4989cae71a182b 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -874,8 +874,8 @@ void NormalizeL2::setPostOps(dnnl::primitive_attr& kernel_attrs, const VectorDim } void NormalizeL2::createPrimitive() { - auto& dstMemPtr = getChildEdgeAt(DATA)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(DATA)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(DATA)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) THROW_ERROR << "can't get destination memory"; if (!srcMemPtr || !srcMemPtr->isAllocated()) diff --git a/src/plugins/intel_cpu/src/nodes/pad.cpp b/src/plugins/intel_cpu/src/nodes/pad.cpp index 315c2cb9810b3f..8baca7c5636500 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.cpp +++ b/src/plugins/intel_cpu/src/nodes/pad.cpp @@ -368,7 +368,7 @@ void Pad::PadExecutor::innerParamsInitialization() { std::min(params.attrs.padsEnd[params.nDimsForWork], 0)) * params.shift; } -void Pad::PadExecutor::exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { +void Pad::PadExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { if (zeroInputDimsCase) { padConstant(srcMemPtr, dstMemPtr); } else { @@ -418,7 +418,7 @@ static inline void parallel_step(size_t nDims, const VectorDims& dims, std::vect } } -void Pad::PadExecutor::padConstant(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { +void Pad::PadExecutor::padConstant(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { if (params.attrs.padValue == 0 && !zeroInputDimsCase) { padConstantZero(srcMemPtr, dstMemPtr); return; @@ -437,7 +437,7 @@ void Pad::PadExecutor::padConstant(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { } template -void Pad::PadExecutor::padConstantCommon(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { +void Pad::PadExecutor::padConstantCommon(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { T* dstData = reinterpret_cast(dstMemPtr->GetPtr()); const T value = static_cast(params.attrs.padValue); if (zeroInputDimsCase) { @@ -486,7 +486,7 @@ void Pad::PadExecutor::padConstantCommon(MemoryPtr& srcMemPtr, MemoryPtr& dstMem }); } -void Pad::PadExecutor::padConstantZero(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { +void Pad::PadExecutor::padConstantZero(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); @@ -527,7 +527,7 @@ void Pad::PadExecutor::padConstantZero(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPt }); } -void Pad::PadExecutor::padEdge(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { +void Pad::PadExecutor::padEdge(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); @@ -568,7 +568,7 @@ void Pad::PadExecutor::padEdge(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { }); } -void Pad::PadExecutor::padReflectOrSymmetric(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const bool isSymmetric) { +void Pad::PadExecutor::padReflectOrSymmetric(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr, const bool isSymmetric) { const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); const size_t shift = isSymmetric ? 1 : 0; diff --git a/src/plugins/intel_cpu/src/nodes/pad.h b/src/plugins/intel_cpu/src/nodes/pad.h index d19fa9c1f30d3d..5f670643d19499 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.h +++ b/src/plugins/intel_cpu/src/nodes/pad.h @@ -57,15 +57,15 @@ class Pad : public Node { const std::vector& srcMemory, const std::vector& dstMemory, const std::string& errorPrefix); - void exec(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); + void exec(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr); ~PadExecutor() = default; private: - void padConstant(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); - template void padConstantCommon(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); - void padConstantZero(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); - void padEdge(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr); - void padReflectOrSymmetric(MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr, const bool isSymmetric = false); + void padConstant(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr); + template void padConstantCommon(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr); + void padConstantZero(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr); + void padEdge(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr); + void padReflectOrSymmetric(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr, const bool isSymmetric = false); void paramsInitialization(const PadAttrs& attrs, const std::vector& srcMemory, const std::vector& dstMemory); diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index 97958bb3de6dec..cbd8b9c2b0057e 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -387,8 +387,8 @@ void Pooling::prepareParams() { } if (useACL) { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate."; if (!srcMemPtr || !srcMemPtr->isAllocated()) diff --git a/src/plugins/intel_cpu/src/nodes/priorbox.cpp b/src/plugins/intel_cpu/src/nodes/priorbox.cpp index 963d39cfe98c9d..7d58745f014f29 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox.cpp @@ -152,7 +152,7 @@ PriorBox::PriorBox(const std::shared_ptr& op, const GraphContext:: } bool PriorBox::needShapeInfer() const { - auto& memory = getChildEdgeAt(0)->getMemoryPtr(); + auto memory = getChildEdgeAt(0)->getMemoryPtr(); if (memory->GetShape().isDynamic()) { return true; } diff --git a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp index bcd24b2f23e602..0e4ef85117e5ac 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp @@ -106,7 +106,7 @@ PriorBoxClustered::PriorBoxClustered(const std::shared_ptr& op, co } bool PriorBoxClustered::needShapeInfer() const { - auto& memory = getChildEdgeAt(0)->getMemoryPtr(); + auto memory = getChildEdgeAt(0)->getMemoryPtr(); if (memory->GetShape().isDynamic()) { return true; } diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index c6c0525941398e..589a5ec19f9d5b 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -1944,7 +1944,7 @@ void Reduce::prepareParams() { reduce_axes = raw_axes; } - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); const SizeVector &dst_dims = dstMemPtr->getDesc().getShape().getDims(); dst_size = dstMemPtr->GetSize(); calc_process_dst_dims(reduce_axes, dst_dims); @@ -1992,8 +1992,8 @@ void Reduce::createPrimitive() { if (!isExecutable()) { return; } - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << errorPrefix << " has not allocated destination memory."; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -2077,8 +2077,8 @@ void Reduce::executeDynamicImpl(dnnl::stream strm) { } void Reduce::execute(dnnl::stream strm) { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr(); const uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -2124,7 +2124,7 @@ void Reduce::reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr, size_t dst_siz if (is_hybrid_layout) { uint8_t *proc_ptr = out_ptr; - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); out_ptr = reinterpret_cast(dstMemPtr->GetPtr()); if (layout == ReduceLayoutType::reduce_nspc) { nspc2ncsp(proc_ptr, out_ptr); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index 6e3bc53c02c787..cc0a2d884b9b78 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -120,8 +120,8 @@ void Reorder::prepareParams() { if (isOptimized) return; - auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory didn't allocate."; if (!srcMemPtr || !srcMemPtr->isAllocated()) diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp index bc2501102482cd..158d594bfd9233 100644 --- a/src/plugins/intel_cpu/src/nodes/reshape.cpp +++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp @@ -322,8 +322,8 @@ void Reshape::executeDynamicImpl(dnnl::stream strm) { } void Reshape::execute(dnnl::stream strm) { - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto srcPtr = static_cast(srcMemPtr->GetPtr()); auto dstPtr = static_cast(dstMemPtr->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp index 6606ab7505872d..b17841865be2d7 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp @@ -127,7 +127,7 @@ ReverseSequence::ReverseSequenceExecutor::ReverseSequenceExecutor(const VectorDi } template -void ReverseSequence::ReverseSequenceExecutor::exec(const MemoryPtr& dataMemPtr, const MemoryPtr& seqLengthsMemPtr, MemoryPtr& dstMemPtr) { +void ReverseSequence::ReverseSequenceExecutor::exec(const MemoryPtr& dataMemPtr, const MemoryPtr& seqLengthsMemPtr, const MemoryPtr& dstMemPtr) { const VectorDims& srcDims = dataMemPtr->getStaticDims(); const auto *srcData = reinterpret_cast(dataMemPtr->GetPtr()); auto *dstData = reinterpret_cast(dstMemPtr->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.h b/src/plugins/intel_cpu/src/nodes/reverse_sequence.h index f1e7d67a999616..55fd457c0a688e 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.h +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.h @@ -34,7 +34,7 @@ class ReverseSequence : public Node { ~ReverseSequenceExecutor() = default; template - void exec(const MemoryPtr& dataMemPtr, const MemoryPtr& seqLengthsMemPtr, MemoryPtr& dstMemPtr); + void exec(const MemoryPtr& dataMemPtr, const MemoryPtr& seqLengthsMemPtr, const MemoryPtr& dstMemPtr); private: const int batchAxis; diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index 4e2c4ab246ca42..33236a7815a89a 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -814,8 +814,8 @@ void ROIAlign::initSupportedPrimitiveDescriptors() { } void ROIAlign::createPrimitive() { - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!srcMemPtr || !srcMemPtr->isAllocated()) IE_THROW() << errorPrefix << " did not allocate input memory"; if (!dstMemPtr || !dstMemPtr->isAllocated()) diff --git a/src/plugins/intel_cpu/src/nodes/roll.cpp b/src/plugins/intel_cpu/src/nodes/roll.cpp index f06e4c84d48cf0..4e6f2c8d051d52 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.cpp +++ b/src/plugins/intel_cpu/src/nodes/roll.cpp @@ -177,7 +177,7 @@ Roll::RollExecutor::RollExecutor(const VectorDims& dataDims, const VectorDims& s template void Roll::RollExecutor::exec(const MemoryPtr& dataMemPtr, const MemoryPtr& shiftMemPtr, const MemoryPtr& axesMemPtr, - MemoryPtr& dstMemPtr) { + const MemoryPtr& dstMemPtr) { const auto *data = reinterpret_cast(dataMemPtr->GetPtr()); const auto *shift = reinterpret_cast(shiftMemPtr->GetPtr()); const auto *axes = reinterpret_cast(axesMemPtr->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/roll.h b/src/plugins/intel_cpu/src/nodes/roll.h index 9e6fd6d508e426..dcaf32c144eecd 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.h +++ b/src/plugins/intel_cpu/src/nodes/roll.h @@ -34,7 +34,7 @@ class Roll : public Node { template void exec(const MemoryPtr& dataMemPtr, const MemoryPtr& shiftMemPtr, const MemoryPtr& axesMemPtr, - MemoryPtr& dstMemPtr); + const MemoryPtr& dstMemPtr); private: const size_t numOfDims; diff --git a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp index 7340bec357d296..6198c21eee717f 100644 --- a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp +++ b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp @@ -254,10 +254,10 @@ static std::vector getBlockND(const VectorDims& shape) { } void ScatterUpdate::execute(dnnl::stream strm) { - auto &srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &indicesMemPtr = getParentEdgeAt(INDICES_ID)->getMemoryPtr(); - auto &updateMemPtr = getParentEdgeAt(UPDATE_ID)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto indicesMemPtr = getParentEdgeAt(INDICES_ID)->getMemoryPtr(); + auto updateMemPtr = getParentEdgeAt(UPDATE_ID)->getMemoryPtr(); uint8_t *dstPtr = reinterpret_cast(dstMemPtr->GetPtr()); uint8_t *srcPtr = reinterpret_cast(srcMemPtr->GetPtr()); @@ -269,7 +269,7 @@ void ScatterUpdate::execute(dnnl::stream strm) { size_t srcRank = srcDataDim.size(); int axis = 0; if (axisRelaxed) { - auto &axisMemPtr = getParentEdgeAt(AXIS_ID)->getMemoryPtr(); + auto axisMemPtr = getParentEdgeAt(AXIS_ID)->getMemoryPtr(); uint8_t *axisPtr = reinterpret_cast(axisMemPtr->GetPtr()); if (axisSize == 4) { auto *axisPtr32 = reinterpret_cast(axisPtr); diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp index ac02f1c8175321..ae01c05a16ccef 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp @@ -125,8 +125,8 @@ void ShuffleChannels::initSupportedPrimitiveDescriptors() { } void ShuffleChannels::createPrimitive() { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) THROW_SHCH_ERROR << "has not allocated destination memory"; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -149,7 +149,7 @@ void ShuffleChannels::createPrimitive() { } void ShuffleChannels::prepareParams() { - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); auto builder = [](const ShuffleChannelsAttributes& key) -> std::shared_ptr { return std::make_shared(key); }; diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp index af22ab4b510e1c..f97ef8bf06c59d 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp @@ -164,8 +164,8 @@ void SpaceToDepth::initSupportedPrimitiveDescriptors() { } void SpaceToDepth::createPrimitive() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) THROW_ERROR << "has not allocated destination memory"; if (!srcMemPtr || !srcMemPtr->isAllocated()) diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index d23f8ebaba7359..bb26d696133180 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -543,10 +543,10 @@ void Split::resolveInPlaceEdges(Edge::LOOK look) { // getName() << " with type " << getTypeStr(); auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); - childEdge->getMemoryPtr().reset(new Memory(getEngine())); - childEdge->getMemoryPtr()->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); + auto newMem = std::make_shared(getEngine()); + newMem->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); - childEdge->changeStatus(Edge::Status::Allocated); + childEdge->resetMemoryPtr(newMem); } offset += partDim; } diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index b3af29c3c01255..f57128a2443cab 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -634,7 +634,7 @@ void TensorIterator::executeDynamicImpl(dnnl::stream strm) { void TensorIterator::prepareInputPorts() { const auto &eng = getEngine(); for (auto map_rule : inputPortMap) { - auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); + auto from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &to_mem = input_mems[map_rule.to].front(); // first memory is enough to access the shared underlying physical memory if (map_rule.axis == -1) @@ -648,7 +648,7 @@ void TensorIterator::prepareInputPorts() { void TensorIterator::prepareOutputPorts() { const auto &eng = getEngine(); for (auto map_rule : outputPortMap) { - auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); + auto to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &from_mem = output_mem[map_rule.to]; if (map_rule.axis == -1) diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp index d3b14e6f458ac0..939792f1bc2f02 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.cpp +++ b/src/plugins/intel_cpu/src/nodes/topk.cpp @@ -1981,8 +1981,8 @@ void TopK::preset_params() { } void TopK::prepareParams() { - auto &dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << errorPrefix << " has not allocated destination memory."; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -2064,7 +2064,7 @@ void TopK::prepareParams() { } void TopK::createPrimitive() { - auto &srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); if (srcMemPtr->getDesc().hasLayoutType(LayoutType::ncsp)) { layout = TopKLayoutType::topk_ncsp; } else if (srcMemPtr->getDesc().hasLayoutType(LayoutType::nspc)) { @@ -2137,9 +2137,9 @@ void TopK::executeDynamicImpl(dnnl::stream strm) { } void TopK::execute(dnnl::stream strm) { - auto &srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); - auto &dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); - auto &dstIndexesMemPtr = getChildEdgeAt(TOPK_INDEX)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(TOPK_DATA)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); + auto dstIndexesMemPtr = getChildEdgeAt(TOPK_INDEX)->getMemoryPtr(); const uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index 0217a4bb772e78..dd87e98e7d4f7f 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -187,8 +187,8 @@ bool Transpose::needPrepareParams() const { void Transpose::prepareParams() { if (performAsReorder) { // Transpose(order={0,3,1,2}) can be performed as Reorder(acdb=>abcd) - auto& srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto dstDesc = dstMemPtr->GetDescWithType()->getDnnlDesc(); auto srcDesc = dnnl::memory::desc(dstDesc.get_dims(), dstDesc.get_data_type(), memory::format_tag::acdb); auto result = getReorderPrim(context->getParamsCache(), getEngine(), srcDesc, dstDesc); @@ -237,8 +237,8 @@ void Transpose::prepareParams() { } void Transpose::createPrimitive() { - auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto& srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) IE_THROW() << "Destination memory was not allocated."; if (!srcMemPtr || !srcMemPtr->isAllocated()) @@ -378,8 +378,8 @@ void Transpose::execute(dnnl::stream strm) { if (prim) { prim.execute(strm, primArgs); } else if (execPtr) { - auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto &srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); int MB = srcMemPtr->getStaticDims()[0]; diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp index 3687888fd9ff04..7cf6853f3e4565 100644 --- a/src/plugins/intel_cpu/src/nodes/unique.cpp +++ b/src/plugins/intel_cpu/src/nodes/unique.cpp @@ -88,13 +88,13 @@ void Unique::createPrimitive() { } void Unique::prepareParams() { - auto& dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); + auto dataMemPtr = getParentEdgeAt(IN_DATA)->getMemoryPtr(); if (!dataMemPtr || !dataMemPtr->isAllocated()) { THROW_ERROR << " has not allocated input data memory."; } for (int i = 0; i < 4; i++) { if (definedOutputs[i]) { - auto& dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); + auto dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->isAllocated()) { THROW_ERROR << " has not allocated output memory at port " << i; } diff --git a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp index 68f67597da8703..91fa8ff0fb942b 100644 --- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp +++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/concat_reshape_concat.cpp @@ -106,7 +106,7 @@ class ConcatReshapeConcatSubgraphTest : public testing::WithParamInterface(concat3, softmax_axis); ngraph::ResultVector results; - for (int i = 0; i < soft_max->get_output_size(); i++) + for (size_t i = 0; i < soft_max->get_output_size(); i++) results.push_back(std::make_shared(soft_max->output(i))); function = std::make_shared(results, input_params, "ConcatReshapeConcatPattern"); From 4a87f2e310c20b2afc638e1f522b8d5018c43567 Mon Sep 17 00:00:00 2001 From: jialipen Date: Thu, 1 Jun 2023 23:47:20 +0800 Subject: [PATCH 13/28] extract IMemory interface and implements Memory class. --- src/plugins/intel_cpu/src/cpu_memory.cpp | 61 +++++----- src/plugins/intel_cpu/src/cpu_memory.h | 115 +++++++++++------- .../intel_cpu/src/dnnl_postops_composer.cpp | 18 +-- src/plugins/intel_cpu/src/dnnl_scratch_pad.h | 4 +- src/plugins/intel_cpu/src/edge.cpp | 38 ++++-- src/plugins/intel_cpu/src/edge.h | 4 +- src/plugins/intel_cpu/src/graph.cpp | 14 +-- src/plugins/intel_cpu/src/graph_optimizer.cpp | 6 +- src/plugins/intel_cpu/src/infer_request.cpp | 4 +- .../src/memory_desc/cpu_memory_desc_utils.cpp | 4 +- .../src/memory_desc/cpu_memory_desc_utils.h | 6 +- src/plugins/intel_cpu/src/node.cpp | 21 ++-- .../intel_cpu/src/nodes/adaptive_pooling.cpp | 12 +- .../intel_cpu/src/nodes/batch_to_space.cpp | 8 +- src/plugins/intel_cpu/src/nodes/bin_conv.cpp | 6 +- src/plugins/intel_cpu/src/nodes/broadcast.cpp | 12 +- src/plugins/intel_cpu/src/nodes/bucketize.cpp | 6 +- .../intel_cpu/src/nodes/color_convert.cpp | 4 +- .../src/nodes/common/tile_broadcast_utils.cpp | 6 +- src/plugins/intel_cpu/src/nodes/concat.cpp | 15 ++- src/plugins/intel_cpu/src/nodes/conv.cpp | 16 +-- src/plugins/intel_cpu/src/nodes/convert.cpp | 4 +- .../src/nodes/ctc_greedy_decoder.cpp | 6 +- .../src/nodes/ctc_greedy_decoder_seq_len.cpp | 10 +- src/plugins/intel_cpu/src/nodes/ctc_loss.cpp | 12 +- src/plugins/intel_cpu/src/nodes/cum_sum.cpp | 10 +- src/plugins/intel_cpu/src/nodes/cum_sum.h | 2 +- src/plugins/intel_cpu/src/nodes/deconv.cpp | 7 +- src/plugins/intel_cpu/src/nodes/def_conv.cpp | 10 +- .../intel_cpu/src/nodes/depth_to_space.cpp | 4 +- .../intel_cpu/src/nodes/detection_output.cpp | 12 +- src/plugins/intel_cpu/src/nodes/dft.cpp | 6 +- src/plugins/intel_cpu/src/nodes/eltwise.cpp | 4 +- .../src/nodes/embedding_bag_offset_sum.cpp | 10 +- .../src/nodes/embedding_bag_packed_sum.cpp | 6 +- .../intel_cpu/src/nodes/embedding_bag_sum.cpp | 2 +- .../src/nodes/embedding_segments_sum.cpp | 12 +- .../src/nodes/executors/acl/acl_eltwise.cpp | 4 +- .../nodes/executors/acl/acl_interpolate.cpp | 2 +- .../src/nodes/executors/acl/acl_mvn.cpp | 4 +- .../src/nodes/executors/acl/acl_pooling.cpp | 6 +- .../src/nodes/executors/acl/acl_reduce.cpp | 4 +- ...xperimental_detectron_detection_output.cpp | 16 +-- ...ectron_generate_proposals_single_image.cpp | 12 +- ...erimental_detectron_priorgridgenerator.cpp | 4 +- ...rimental_detectron_roifeatureextractor.cpp | 8 +- .../nodes/experimental_detectron_topkrois.cpp | 6 +- .../src/nodes/extract_image_patches.cpp | 4 +- src/plugins/intel_cpu/src/nodes/eye.cpp | 2 +- src/plugins/intel_cpu/src/nodes/eye.h | 8 +- .../intel_cpu/src/nodes/fake_quantize.cpp | 23 ++-- .../intel_cpu/src/nodes/fullyconnected.cpp | 2 +- src/plugins/intel_cpu/src/nodes/gather.cpp | 27 ++-- .../intel_cpu/src/nodes/gather_elements.cpp | 6 +- src/plugins/intel_cpu/src/nodes/gather_nd.cpp | 12 +- .../intel_cpu/src/nodes/gather_tree.cpp | 10 +- .../src/nodes/generate_proposals.cpp | 14 +-- .../intel_cpu/src/nodes/grid_sample.cpp | 6 +- src/plugins/intel_cpu/src/nodes/grn.cpp | 4 +- src/plugins/intel_cpu/src/nodes/if.cpp | 4 +- src/plugins/intel_cpu/src/nodes/input.cpp | 20 ++- .../intel_cpu/src/nodes/interaction.cpp | 15 ++- .../intel_cpu/src/nodes/interpolate.cpp | 12 +- .../intel_cpu/src/nodes/log_softmax.cpp | 4 +- .../intel_cpu/src/nodes/mathematics.cpp | 4 +- .../intel_cpu/src/nodes/matrix_nms.cpp | 10 +- src/plugins/intel_cpu/src/nodes/memory.cpp | 12 +- src/plugins/intel_cpu/src/nodes/memory.hpp | 2 +- src/plugins/intel_cpu/src/nodes/mha.cpp | 10 +- .../intel_cpu/src/nodes/multiclass_nms.cpp | 12 +- src/plugins/intel_cpu/src/nodes/mvn.cpp | 4 +- src/plugins/intel_cpu/src/nodes/ngram.cpp | 6 +- .../src/nodes/non_max_suppression.cpp | 22 ++-- src/plugins/intel_cpu/src/nodes/non_zero.cpp | 4 +- src/plugins/intel_cpu/src/nodes/normalize.cpp | 4 +- src/plugins/intel_cpu/src/nodes/one_hot.cpp | 12 +- src/plugins/intel_cpu/src/nodes/pad.cpp | 20 +-- src/plugins/intel_cpu/src/nodes/priorbox.cpp | 10 +- .../src/nodes/priorbox_clustered.cpp | 10 +- src/plugins/intel_cpu/src/nodes/proposal.cpp | 10 +- .../intel_cpu/src/nodes/psroi_pooling.cpp | 8 +- src/plugins/intel_cpu/src/nodes/range.cpp | 8 +- src/plugins/intel_cpu/src/nodes/rdft.cpp | 12 +- src/plugins/intel_cpu/src/nodes/reduce.cpp | 6 +- src/plugins/intel_cpu/src/nodes/reference.cpp | 4 +- .../intel_cpu/src/nodes/region_yolo.cpp | 4 +- src/plugins/intel_cpu/src/nodes/reorder.cpp | 33 +++-- src/plugins/intel_cpu/src/nodes/reorder.h | 2 +- .../intel_cpu/src/nodes/reorg_yolo.cpp | 4 +- src/plugins/intel_cpu/src/nodes/reshape.cpp | 12 +- .../intel_cpu/src/nodes/reverse_sequence.cpp | 6 +- src/plugins/intel_cpu/src/nodes/rnn.cpp | 6 +- src/plugins/intel_cpu/src/nodes/roi_align.cpp | 8 +- .../intel_cpu/src/nodes/roi_pooling.cpp | 24 ++-- src/plugins/intel_cpu/src/nodes/roi_pooling.h | 6 +- src/plugins/intel_cpu/src/nodes/roll.cpp | 8 +- .../intel_cpu/src/nodes/scatter_update.cpp | 10 +- src/plugins/intel_cpu/src/nodes/shapeof.cpp | 2 +- .../intel_cpu/src/nodes/shuffle_channels.cpp | 4 +- .../intel_cpu/src/nodes/space_to_batch.cpp | 8 +- .../intel_cpu/src/nodes/space_to_depth.cpp | 4 +- src/plugins/intel_cpu/src/nodes/split.cpp | 13 +- .../intel_cpu/src/nodes/strided_slice.cpp | 12 +- .../intel_cpu/src/nodes/tensoriterator.cpp | 13 +- src/plugins/intel_cpu/src/nodes/tile.cpp | 8 +- src/plugins/intel_cpu/src/nodes/topk.cpp | 14 +-- src/plugins/intel_cpu/src/nodes/transpose.cpp | 18 +-- src/plugins/intel_cpu/src/nodes/unique.cpp | 20 +-- src/plugins/intel_cpu/src/utils/blob_dump.cpp | 2 +- src/plugins/intel_cpu/src/utils/blob_dump.h | 5 +- .../intel_cpu/src/utils/node_dumper.cpp | 3 +- .../shape_inference_ngraph.cpp | 2 +- src/plugins/intel_cpu/src/weights_cache.hpp | 2 +- 113 files changed, 599 insertions(+), 580 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 87fad83cf1b90b..f8c3cde89e3f98 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -34,10 +34,15 @@ namespace { } } // namespace -Memory::Memory(const dnnl::engine& eng) : - eng(eng), mgrHandle(std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), this), dnnlMemHandle(this) {} -Memory::Memory(const dnnl::engine& eng, std::unique_ptr mngr) : - eng(eng), mgrHandle(std::make_shared(std::move(mngr)), this), dnnlMemHandle(this) {} +Memory::Memory(const dnnl::engine& eng, MemoryDescPtr _pMemDesc, const void* data, bool pads_zeroing) : + eng(eng), mgrHandle(std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), this), dnnlMemHandle(this), IMemory(_pMemDesc) {Create(pMemDesc, data, pads_zeroing);} +Memory::Memory(const dnnl::engine& eng, const MemoryDesc& _MemDesc, const void* data, bool pads_zeroing) : + eng(eng), mgrHandle(std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), this), dnnlMemHandle(this), IMemory(_MemDesc.clone()) {Create(pMemDesc, data, pads_zeroing);} + +Memory::Memory(const dnnl::engine& eng, std::unique_ptr mngr, MemoryDescPtr _pMemDesc) : + eng(eng), mgrHandle(std::make_shared(std::move(mngr)), this), dnnlMemHandle(this), IMemory(_pMemDesc) { Create(_pMemDesc, mgrHandle.get());} +Memory::Memory(const dnnl::engine& eng, std::unique_ptr mngr, const MemoryDesc& _MemDesc) : + eng(eng), mgrHandle(std::make_shared(std::move(mngr)), this), dnnlMemHandle(this), IMemory(_MemDesc.clone()) { Create(pMemDesc, mgrHandle.get());} size_t Memory::GetSize() const { auto size = getDesc().getCurrentMemSize(); @@ -68,7 +73,7 @@ void Memory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroing) { } } -void Memory::SetData(const Memory& src, bool ftz) const { +void Memory::SetData(const IMemory& src, bool ftz) const { node::Reorder::reorderData(src, *this); auto localPrim = GetPrimitive(); @@ -94,39 +99,39 @@ void Memory::FillZero() { memset(dataPtr, 0, getDesc().getCurrentMemSize()); } -void *Memory::GetPtr() const { - auto ptr = static_cast(GetData()); - ptr += pMemDesc->getOffsetPadding() * pMemDesc->getPrecision().size(); - return ptr; -} +// void *Memory::GetPtr() const { +// auto ptr = static_cast(GetData()); +// ptr += pMemDesc->getOffsetPadding() * pMemDesc->getPrecision().size(); +// return ptr; +// } -void Memory::redefineDesc(MemoryDescPtr desc) { +void Memory::redefineDesc(MemoryDescPtr desc, const void* data, bool pads_zeroing) { if (!desc->hasDefinedMaxSize()) { IE_THROW() << "Can not reset descriptor, memory upper bound is unknown."; } - this->Create(desc, nullptr, false); + this->Create(desc, data, pads_zeroing); // nullptr, false } template<> -DnnlMemoryDescPtr Memory::GetDescWithType() const { +DnnlMemoryDescPtr IMemory::GetDescWithType() const { return MemoryDescUtils::convertToDnnlMemoryDesc(pMemDesc); } -void Memory::setDataHandle(void *data) { - if (!mgrHandle->hasExtBuffer()) { - mgrHandle = DnnlMemMngrHandle( - std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), - this); - } - - size_t maxMemSize = pMemDesc->isDefined() ? pMemDesc->getCurrentMemSize() : 0; - mgrHandle->setExtBuff(data, maxMemSize); - if (dnnlMemHandle.isInit()) { - auto prim = dnnlMemHandle.getPrim(); - prim.set_data_handle(mgrHandle->getRawPtr()); // for pads zeroing, to preserve dnnl::memory::set_data_handle behaviour - } -} +// void Memory::setDataHandle(void *data) { +// if (!mgrHandle->hasExtBuffer()) { +// mgrHandle = DnnlMemMngrHandle( +// std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), +// this); +// } + +// size_t maxMemSize = pMemDesc->isDefined() ? pMemDesc->getCurrentMemSize() : 0; +// mgrHandle->setExtBuff(data, maxMemSize); +// if (dnnlMemHandle.isInit()) { +// auto prim = dnnlMemHandle.getPrim(); +// prim.set_data_handle(mgrHandle->getRawPtr()); // for pads zeroing, to preserve dnnl::memory::set_data_handle behaviour +// } +// } void Memory::update() { if (dnnlMemHandle.isInit()) { @@ -147,7 +152,7 @@ void Memory::Create(MemoryDescPtr desc, MemoryMngrPtr memMgr) { } template<> -BlockedMemoryDescPtr Memory::GetDescWithType() const { +BlockedMemoryDescPtr IMemory::GetDescWithType() const { return MemoryDescUtils::convertToBlockedMemoryDesc(pMemDesc); } diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index ab6d9cbff1e530..872aa72078831b 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -159,10 +159,53 @@ class DnnlMemMngrHandle { Memory* _pMem = nullptr; }; -class Memory { +class IMemory { public: - explicit Memory(const dnnl::engine& eng); - Memory(const dnnl::engine& eng, std::unique_ptr mngr); + virtual dnnl::memory GetPrimitive() const = 0; // that might be a pain in the neck, but we still have to support it + virtual dnnl::memory::data_type GetDataType() const = 0; // still better than downcast + + virtual bool isAllocated() const noexcept = 0; + + virtual const MemoryDesc& getDesc() const = 0; + virtual MemoryDescPtr getDescPtr() const = 0; + + virtual void* GetData() const = 0; // pointer to the actual memory + + virtual size_t GetSize() const = 0; // in bytes + + virtual const Shape& GetShape() const = 0; + + // Redefines descriptor. The memory descriptor will be replaced with the new one. + // Memory will not be reallocated if the new tensor size is less or equal the upper bound. + // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. + virtual void redefineDesc(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = false) = 0; + + virtual void SetData(const IMemory& memory, bool ftz = true) const = 0; + virtual void FillZero() = 0; + + virtual const VectorDims& getStaticDims() const = 0; + + virtual bool isUsedExternalStorage() const = 0; + + virtual MemoryMngrPtr getMemoryMngr() const = 0; // returns nullptr when has nothing to return + + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + std::shared_ptr GetDescWithType() const; // the only not pure method, since it exploits a static polymorphism. Should call getDesc and type cast internally + +protected: + IMemory() = delete; + IMemory(MemoryDescPtr _pMemDesc) : pMemDesc(_pMemDesc) {}; + MemoryDescPtr pMemDesc; +}; + +class Memory : public IMemory { +public: + explicit Memory(const dnnl::engine& eng, MemoryDescPtr pMemDesc, const void* data = nullptr, bool pads_zeroing = true); + explicit Memory(const dnnl::engine& eng, const MemoryDesc& MemDesc, const void* data = nullptr, bool pads_zeroing = true); + Memory(const dnnl::engine& eng, std::unique_ptr mngr, MemoryDescPtr pMemDesc); + Memory(const dnnl::engine& eng, std::unique_ptr mngr, const MemoryDesc& MemDesc); Memory(const Memory&) = delete; Memory& operator= (const Memory&) = delete; @@ -170,9 +213,9 @@ class Memory { Memory(Memory&&) = delete; Memory& operator= (Memory&&) = delete; - dnnl::memory GetPrimitive() const; + dnnl::memory GetPrimitive() const override; - bool isAllocated() const noexcept { + bool isAllocated() const noexcept override { if (mgrHandle->getRawPtr()) { return true; } @@ -188,29 +231,24 @@ class Memory { return false; } - /** - * @brief Resets the memory manager to a new one created with the provided raw memory - */ - void setDataHandle(void* data); - - const MemoryDesc& getDesc() const { + const MemoryDesc& getDesc() const override { return *pMemDesc; } - MemoryDescPtr getDescPtr() const { + MemoryDescPtr getDescPtr() const override { return pMemDesc; } - template ::value && !std::is_reference::value, int>::type = 0, - typename std::enable_if::value, int>::type = 0> - std::shared_ptr GetDescWithType() const; + // template ::value && !std::is_reference::value, int>::type = 0, + // typename std::enable_if::value, int>::type = 0> + // std::shared_ptr GetDescWithType() const; /** * Return handler of buffer. Real data may starts from some other offset * @return */ - void* GetData() const { + void* GetData() const override { void* data = getDataNoThrow(); if (data == nullptr && pMemDesc->getShape().isStatic() && @@ -219,38 +257,25 @@ class Memory { return data; } - /** - * Return raw pointer on first element - * Like a GetData() but offset is applied. - * @return - */ - void* GetPtr() const; - - dnnl::memory::data_type GetDataType() const { + dnnl::memory::data_type GetDataType() const override { return DnnlExtensionUtils::IEPrecisionToDataType(getDesc().getPrecision()); } - size_t GetSize() const; + size_t GetSize() const override; - const Shape& GetShape() const { + const Shape& GetShape() const override { return getDesc().getShape(); } - void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); - void Create(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); - - void Create(const MemoryDesc& desc, MemoryMngrPtr memMgr); - void Create(MemoryDescPtr desc, MemoryMngrPtr memMgr); - // Redefines descriptor. The memory descriptor will be replaced with the new one. // Memory will not be reallocated if the new tensor size is less or equal the upper bound. // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. - void redefineDesc(MemoryDescPtr desc); + void redefineDesc(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = false) override; - void SetData(const Memory& memory, bool ftz = true) const; - void FillZero(); + void SetData(const IMemory& memory, bool ftz = true) const override; + void FillZero() override; - const VectorDims& getStaticDims() const { + const VectorDims& getStaticDims() const override { return getDesc().getShape().getStaticDims(); } @@ -258,11 +283,11 @@ class Memory { return eng; } - bool isUsedExternalStorage() const { + bool isUsedExternalStorage() const override { return mgrHandle->hasExtBuffer(); } - MemoryMngrPtr getMemoryMngr() const { + MemoryMngrPtr getMemoryMngr() const override { return mgrHandle.get(); } @@ -272,8 +297,14 @@ class Memory { private: void update(); + void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); + void Create(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); + + void Create(const MemoryDesc& desc, MemoryMngrPtr memMgr); + void Create(MemoryDescPtr desc, MemoryMngrPtr memMgr); + private: - MemoryDescPtr pMemDesc; + // MemoryDescPtr pMemDesc; dnnl::engine eng; DnnlMemMngrHandle mgrHandle; bool padsZeroing = true; @@ -297,8 +328,8 @@ class Memory { } }; -using MemoryPtr = std::shared_ptr; -using MemoryCPtr = std::shared_ptr; +using MemoryPtr = std::shared_ptr; +using MemoryCPtr = std::shared_ptr; } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp index 0ad586e1bc863c..f544e7a93fcb8e 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp @@ -59,9 +59,9 @@ void DnnlPostOpsComposer::updateWeiScales() { attr.set_scales_mask(DNNL_ARG_WEIGHTS, wei_scale_mask); DnnlBlockedMemoryDesc memoryDesc(InferenceEngine::Precision::FP32, Shape({wei_scale_values.size()})); - auto mem = std::make_shared(engine); - mem->Create(memoryDesc); - memcpy(mem->GetPtr(), wei_scale_values.data(), wei_scale_values.size() * sizeof(float)); + auto mem = std::make_shared(engine, memoryDesc); + // mem->Create(memoryDesc); + memcpy(mem->GetData(), wei_scale_values.data(), wei_scale_values.size() * sizeof(float)); args[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS] = mem; } @@ -73,9 +73,9 @@ void DnnlPostOpsComposer::updateDestScales() { attr.set_scales_mask(DNNL_ARG_DST, 0); DnnlBlockedMemoryDesc memoryDesc(InferenceEngine::Precision::FP32, Shape({1})); - auto mem = std::make_shared(engine); - mem->Create(memoryDesc); - memcpy(mem->GetPtr(), &dst_scale_val, sizeof(float)); + auto mem = std::make_shared(engine, memoryDesc); + // mem->Create(memoryDesc); + memcpy(mem->GetData(), &dst_scale_val, sizeof(float)); args[DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST] = mem; } @@ -92,9 +92,9 @@ void DnnlPostOpsComposer::appendBinary(const dnnl::algorithm alg, const std::vec ops.append_binary(alg, memoryDesc.getDnnlDesc()); // copy the data as args - auto mem = std::make_shared(engine); - mem->Create(memoryDesc); - memcpy(mem->GetPtr(), data.data(), data.size() * sizeof(float)); + auto mem = std::make_shared(engine, memoryDesc); + // mem->Create(memoryDesc); + memcpy(mem->GetData(), data.data(), data.size() * sizeof(float)); args[DNNL_ARG_ATTR_MULTIPLE_POST_OP(ops.len() - 1) | DNNL_ARG_SRC_1] = mem; } diff --git a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h index 390cf363f3e1e8..b6b007bc8b35ca 100644 --- a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h +++ b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h @@ -23,8 +23,8 @@ class DnnlScratchPad { } MemoryPtr createScratchPadMem(const MemoryDescPtr& md) { - auto mem = std::make_shared(eng); - mem->Create(md, mgrPtr); + auto mem = std::make_shared(eng, std::unique_ptr(mgrPtr.get()), md); + // mem->Create(md, mgrPtr); return mem; } }; diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 97a86b36bdfa6f..cf6926fc28b4c6 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -252,7 +252,7 @@ int Edge::getOutputNum() const { return child_port; } -void Edge::allocateCommon(const std::function& allocate) { +void Edge::allocateCommon(const std::function& allocate) { if (status != Status::NeedAllocation) return; @@ -264,17 +264,15 @@ void Edge::allocateCommon(const std::functiongetEngine())); - allocate(memoryPtr, inputDesc); DEBUG_LOG(*this, " memoryPtr=", memoryPtr); status = Status::Allocated; } void Edge::allocate(const void* mem_ptr) { - auto allocateFunc = [=](const MemoryPtr& memoryPtr, const MemoryDesc& inputDesc) { - memoryPtr->Create(inputDesc, mem_ptr, false); // no pads zeroing + auto allocateFunc = [=](MemoryPtr memoryPtr, const MemoryDesc& inputDesc) { + auto parentPtr = getParent(); + memoryPtr.reset(new Memory(parentPtr->getEngine(), inputDesc, mem_ptr, false)); // no pads zeroing }; allocateCommon(allocateFunc); @@ -285,11 +283,29 @@ void Edge::allocate(MemoryMngrPtr memMngr) { IE_THROW(Unexpected) << "Memory manager ptr is NULL"; } - auto allocateFunc = [=](const MemoryPtr& memoryPtr, const MemoryDesc& inputDesc) { - memoryPtr->Create(inputDesc, memMngr); - }; + // auto allocateFunc = [=](MemoryPtr memoryPtr, const MemoryDesc& inputDesc) { + // auto parentPtr = getParent(); + // memoryPtr.reset(new Memory(parentPtr->getEngine(), std::unique_ptr(memMngr.get()), inputDesc)); + // }; - allocateCommon(allocateFunc); + // allocateCommon(allocateFunc); + + if (status != Status::NeedAllocation) + return; + + if (memoryPtr) + IE_THROW() << "Unexpected behaviour: status == NeedAllocation but memory is already allocated."; + + auto& inputDesc = getInputDesc(); + auto& outputDesc = getOutputDesc(); + if (!inputDesc.isCompatible(outputDesc)) + IE_THROW() << "Cannot allocate memory for incompatible descriptors."; + + auto parentPtr = getParent(); + memoryPtr.reset(new Memory(parentPtr->getEngine(), std::unique_ptr(memMngr.get()), inputDesc)); + + DEBUG_LOG(*this, " memoryPtr=", memoryPtr); + status = Status::Allocated; } std::string Edge::name() const { @@ -412,7 +428,7 @@ const MemoryDesc& Edge::getDesc() const { return getInputDesc(); } -const Memory &Edge::getMemory() { +const IMemory &Edge::getMemory() { return *getMemoryPtr(); } diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index 483f83f1a8cfbb..c00fd371c17990 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -63,7 +63,7 @@ class Edge { const std::shared_ptr getParent() const; const std::shared_ptr getChild() const; - const Memory& getMemory(); + const IMemory& getMemory(); MemoryPtr getMemoryPtr() const; void resetMemoryPtr(MemoryPtr mem); @@ -108,7 +108,7 @@ class Edge { void collectConsumers(std::vector>& result) const; EdgePtr getBaseEdge(int look = LOOK_BOTH); - void allocateCommon(const std::function& allocate); + void allocateCommon(const std::function& allocate); friend class Graph; }; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index c17f5fb81d30ee..a2762768aab536 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -775,7 +775,7 @@ void Graph::AllocateWithReuse() { && edge->getParent()->isConstant()) { if (edge->getParent()->getType() == Type::Input) { auto constNode = std::static_pointer_cast(edge->getParent()); - edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); + edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); } else { edge->externalAllocate(context->getWeightsCache()); } @@ -845,8 +845,7 @@ void Graph::AllocateWithReuse() { MemorySolver staticMemSolver(definedBoxes); size_t total_size = static_cast(staticMemSolver.solve()) * alignment; - memWorkspace = std::make_shared(getEngine()); - memWorkspace->Create(DnnlBlockedMemoryDesc(InferenceEngine::Precision::I8, Shape(InferenceEngine::SizeVector{total_size}))); + memWorkspace = std::make_shared(getEngine(), DnnlBlockedMemoryDesc(InferenceEngine::Precision::I8, Shape(InferenceEngine::SizeVector{total_size}))); if (edge_clusters.empty()) return; @@ -1008,8 +1007,7 @@ void Graph::PushInputData(const std::string& name, const InferenceEngine::Blob:: if (ext_data_ptr != inter_data_ptr) { auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc()); - Memory ext_mem(getEngine()); - ext_mem.Create(ext_tdesc, ext_data_ptr, false); + Memory ext_mem(getEngine(), ext_tdesc, ext_data_ptr, false); childEdge->getMemory().SetData(ext_mem, false); } @@ -1036,7 +1034,7 @@ void Graph::PullOutputData(BlobMap &out) { auto name = outputMap.first; auto node = outputMap.second; auto parentEdge = node->getParentEdgeAt(0); - const Memory& intr_blob = parentEdge->getMemory(); + const auto& intr_blob = parentEdge->getMemory(); const auto ext_blob_map = out.find(name); const auto ext_blob = ext_blob_map->second; @@ -1094,9 +1092,7 @@ void Graph::PullOutputData(BlobMap &out) { auto outBlobDesc = expectedDesc.getLayout() == InferenceEngine::Layout::ANY ? DnnlBlockedMemoryDesc(expectedDesc.getPrecision(), Shape(expectedDesc.getDims())) : MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc); - Memory outBloMem(getEngine()); - outBloMem.Create(outBlobDesc, ext_blob_ptr, false); - + Memory outBloMem(getEngine(), outBlobDesc, ext_blob_ptr, false); outBloMem.SetData(intr_blob, false); } else { size_t size_to_copy = intr_blob.GetDescWithType()->getPaddedElementsCount(); diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 5456e69d887734..de20ebb62d4fa1 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -254,7 +254,7 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph &graph) { if (scalesBlob == nullptr) IE_THROW() << "Cannot cast to TBlob internal scales blob"; - auto scalesData = static_cast(scalesBlob->GetPtr()); + auto scalesData = static_cast(scalesBlob->GetData()); if (scalesData == nullptr) IE_THROW() << "scalesBlob has not allocated buffer"; auto scalesDims = getNormalizedDimsBySize(scales->getOutputShapeAtPort(0).getDims(), @@ -762,7 +762,7 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) { if (zeroPointsBlob == nullptr) IE_THROW() << "Cannot cast to TBlob internal zero points blob"; - auto zeroPointsData = static_cast(zeroPointsBlob->GetPtr()); + auto zeroPointsData = static_cast(zeroPointsBlob->GetData()); if (zeroPointsData == nullptr) IE_THROW() << "zeroPointsBlob has not allocated buffer"; @@ -792,7 +792,7 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) { if (weightsBlob == nullptr) IE_THROW() << "Cannot cast to TBlob internal weights blob"; - auto weightsPtr = static_cast(weightsBlob->GetPtr()); + auto weightsPtr = static_cast(weightsBlob->GetData()); if (weightsPtr == nullptr) IE_THROW() << "weightsBlob has not allocated buffer"; diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index a3dd6c78210c34..e1cae6347e1f32 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -105,7 +105,7 @@ void InferRequestBase::PushStates() { auto cur_state_mem = cur_node->getStore(); auto data_ptr = state->GetState()->cbuffer().as(); auto data_size = state->GetState()->byteSize(); - auto cur_state_mem_buf = static_cast(cur_state_mem->GetPtr()); + auto cur_state_mem_buf = static_cast(cur_state_mem->GetData()); cpu_memcpy(cur_state_mem_buf, data_ptr, data_size); } @@ -127,7 +127,7 @@ void InferRequestBase::PullStates() { auto cur_state_mem = cur_node->getStore(); auto data_ptr = state->GetState()->cbuffer().as(); auto data_size = state->GetState()->byteSize(); - auto cur_state_mem_buf = static_cast(cur_state_mem->GetPtr()); + auto cur_state_mem_buf = static_cast(cur_state_mem->GetData()); cpu_memcpy(data_ptr, cur_state_mem_buf, data_size); } diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp index cc17e6acd83293..e02ce9e114c2db 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.cpp @@ -89,7 +89,7 @@ BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDes } } -InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const Memory &mem) { +InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const IMemory &mem) { // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor auto& memDesc = mem.getDesc(); InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); @@ -98,7 +98,7 @@ InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const Memory &mem) { return make_blob_with_precision(desc, mem.GetData()); } -InferenceEngine::TensorDesc MemoryDescUtils::interpretAsBlobDesc(const Memory &mem) { +InferenceEngine::TensorDesc MemoryDescUtils::interpretAsBlobDesc(const IMemory &mem) { auto& memDesc = mem.getDesc(); InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); diff --git a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h index 02d637d2010c8f..f30a34ecfd11ad 100644 --- a/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h +++ b/src/plugins/intel_cpu/src/memory_desc/cpu_memory_desc_utils.h @@ -19,7 +19,7 @@ class DnnlMemoryDesc; class BlockedMemoryDesc; class DnnlBlockedMemoryDesc; class CpuBlockedMemoryDesc; -class Memory; +class IMemory; class MemoryDescUtils { public: @@ -65,14 +65,14 @@ class MemoryDescUtils { * @param desc Memory from which will be created InferenceEngine::Blob * @return pointer to InferenceEngine::Blob */ - static InferenceEngine::Blob::Ptr interpretAsBlob(const Memory& mem); + static InferenceEngine::Blob::Ptr interpretAsBlob(const IMemory& mem); /** * @brief Creates InferenceEngine::TensorDesc from Memory with the memory reuse * @param desc Memory from which will be created InferenceEngine::Blob * @return InferenceEngine::TensorDesc */ - static InferenceEngine::TensorDesc interpretAsBlobDesc(const Memory& mem); + static InferenceEngine::TensorDesc interpretAsBlobDesc(const IMemory& mem); /** * @brief Converts MemoryDesc to InferenceEngine::TensorDesc diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 4912954f689fc3..65bb363a141c06 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -381,8 +381,7 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { auto baseMemMngr = getChildEdgesAtPort(inplaceOutIndx)[0]->getMemory().getMemoryMngr(); auto memMngr = std::make_shared(baseMemMngr); - auto newMem = std::make_shared(getEngine()); - newMem->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); + auto newMem = std::make_shared(getEngine(), std::unique_ptr(memMngr.get()), selected_pd->getConfig().inConfs[i].getMemDesc()); parentEdge->resetMemoryPtr(newMem); } } @@ -400,8 +399,7 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { for (auto& childEdge : childEdges) { IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected inplace resolve call to an allocated edge: " << childEdge->name(); - auto newMem = std::make_shared(getEngine()); - newMem->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); + auto newMem = std::make_shared(getEngine(), std::unique_ptr(memMngr.get()), selected_pd->getConfig().outConfs[i].getMemDesc()); childEdge->resetMemoryPtr(newMem); } } @@ -801,11 +799,9 @@ void Node::prepareMemory(const DnnlMemoryDescPtr& intDesc, size_t indx) { // TODO [DS]: internal blobs should be removed or rewritten using Memory object auto newDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(internalBlob->getTensorDesc()); - Memory memory{ engine }; - memory.Create(newDesc, internalBlob->buffer()); + Memory memory(engine, newDesc, internalBlob->buffer()); - MemoryPtr _ptr = std::make_shared(engine); - _ptr->Create(intDesc); + MemoryPtr _ptr = std::make_shared(engine, intDesc); node::Reorder::reorderData(memory, *_ptr, context->getParamsCache()); return _ptr; }; @@ -863,11 +859,8 @@ MemoryPtr Node::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { auto create = [&] () { auto newSrcDesc = DnnlExtensionUtils::makeDescriptor(weightSrcDesc); - Memory srcMemory{ getEngine() }; - srcMemory.Create(newSrcDesc, edgeMem->GetData()); - - MemoryPtr _ptr = std::make_shared(getEngine()); - _ptr->Create(weightDesc); + Memory srcMemory{ getEngine(), newSrcDesc, edgeMem->GetData() }; + MemoryPtr _ptr = std::make_shared(getEngine(), weightDesc); node::Reorder::reorderData(srcMemory, *_ptr, context->getParamsCache()); return _ptr; @@ -1397,7 +1390,7 @@ std::pair, std::vector> Node::getScalesAndShifts(const auto constBlob = constInputNode->getMemoryPtr(); const auto elementsCount = constBlob->GetDescWithType()->getPaddedElementsCount(); buffer.resize(elementsCount); - cpu_convert(constBlob->GetPtr(), + cpu_convert(constBlob->GetData(), &buffer[0], DnnlExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()), Precision::FP32, diff --git a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp index 401fb32f67543c..2edfbb141110cb 100644 --- a/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/adaptive_pooling.cpp @@ -43,7 +43,7 @@ class AdaptivePoolingShapeInfer : public ShapeInferEmptyPads { VectorDims outputDims(inputRank); outputDims[0] = inputDims[0]; outputDims[1] = inputDims[1]; - auto newSpatialDimsPtr = reinterpret_cast(data_dependency.at(1)->GetPtr()); + auto newSpatialDimsPtr = reinterpret_cast(data_dependency.at(1)->GetData()); for (size_t i = 0; i < spatialDimsSize; i++) { outputDims[i + 2] = newSpatialDimsPtr[i]; } @@ -139,7 +139,7 @@ void AdaptivePooling::getSupportedDescriptors() { } bool AdaptivePooling::needShapeInfer() const { - const auto newSpatialDimsPtr = reinterpret_cast(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr()); + const auto newSpatialDimsPtr = reinterpret_cast(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetData()); for (int i = 0; i < spatialDimsCount; i++) { if (static_cast(spatialDimsValue[i]) != newSpatialDimsPtr[i]) { for (size_t j = 0; j < spatialDimsValue.size(); j++) { @@ -197,7 +197,7 @@ void AdaptivePooling::execute(dnnl::stream strm) { int *indexDst = nullptr; if (algorithm == Algorithm::AdaptivePoolingMax) { - indexDst = reinterpret_cast(getChildEdgeAt(1)->getMemoryPtr()->GetPtr()); + indexDst = reinterpret_cast(getChildEdgeAt(1)->getMemoryPtr()->GetData()); } auto isPlainFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::ncsp); @@ -207,9 +207,9 @@ void AdaptivePooling::execute(dnnl::stream strm) { auto srcBlockDesc = srcMemory0.GetDescWithType(); int blockSize = isBlkFmt ? srcBlockDesc->getBlockDims().back() : 1; - const auto *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto *srcPooledSpatialShapes = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + const auto *srcPooledSpatialShapes = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); + auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); if (static_cast(srcMemory1.GetShape().getElementsCount()) != spatialDimsCount) IE_THROW() << errorPrefix << "has input spatial dimension (" << srcMemory1.GetShape().getElementsCount() diff --git a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp index 804f79d507d70d..e859ff001da493 100644 --- a/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/batch_to_space.cpp @@ -102,21 +102,21 @@ static std::vector getShape5D(const SizeVector &shape) { template void BatchToSpace::batchToSpaceKernel() { - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); size_t dataRank = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetShape().getRank(); blockShapeIn.clear(); for (size_t i = 0; i < dataRank; i++) { blockShapeIn.push_back(*(blockShapesPtr + i)); } - const auto *padsBeginPtr = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); + const auto *padsBeginPtr = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetData()); cropsBeginIn.clear(); for (size_t i = 0; i < dataRank; i++) { cropsBeginIn.push_back(*(padsBeginPtr + i)); } - auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); const auto &inDims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims(); const auto &outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); diff --git a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp index 5965ed9ba3e78e..501fb71867b5fe 100644 --- a/src/plugins/intel_cpu/src/nodes/bin_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/bin_conv.cpp @@ -1302,9 +1302,9 @@ void BinaryConvolution::execute(dnnl::stream strm) { auto weightsMemory = getParentEdgeAt(1)->getMemoryPtr(); auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); - auto src = reinterpret_cast(srcMemory->GetPtr()); - auto weights = reinterpret_cast(weightsMemory->GetPtr()); - auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto src = reinterpret_cast(srcMemory->GetData()); + auto weights = reinterpret_cast(weightsMemory->GetData()); + auto dst = reinterpret_cast(dstMemory->GetData()); auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); std::vector srcStride(srcDesc->getStrides().size()); diff --git a/src/plugins/intel_cpu/src/nodes/broadcast.cpp b/src/plugins/intel_cpu/src/nodes/broadcast.cpp index 7881bfa9c4befe..c8684430b09b8f 100644 --- a/src/plugins/intel_cpu/src/nodes/broadcast.cpp +++ b/src/plugins/intel_cpu/src/nodes/broadcast.cpp @@ -117,12 +117,12 @@ bool Broadcast::needPrepareParams() const { void Broadcast::prepareParams() { if (!constMap[TARGET_SHAPE_IDX]) { const auto& targetShapeMem = getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory(); - const int32_t* targetShapeData = reinterpret_cast(targetShapeMem.GetPtr()); + const int32_t* targetShapeData = reinterpret_cast(targetShapeMem.GetData()); targetShape.assign(targetShapeData, targetShapeData + targetShapeMem.getStaticDims()[0]); } if (broadcastType == EXPLICIT && !constMap[AXES_MAPPING_IDX]) { const auto& axesMapMem = getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory(); - const int32_t* axesMapData = reinterpret_cast(axesMapMem.GetPtr()); + const int32_t* axesMapData = reinterpret_cast(axesMapMem.GetData()); axesMapping.assign(axesMapData, axesMapData + axesMapMem.getStaticDims()[0]); } @@ -162,7 +162,7 @@ bool Broadcast::needShapeInfer() const { if (targetShape.empty()) { return true; } - const int32_t* targetShapeData = reinterpret_cast(getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory().GetPtr()); + const int32_t* targetShapeData = reinterpret_cast(getParentEdgesAtPort(TARGET_SHAPE_IDX)[0]->getMemory().GetData()); for (size_t i = 0lu; i < targetShape.size(); i++) { if (targetShape[i] != targetShapeData[i]) { return true; @@ -173,7 +173,7 @@ bool Broadcast::needShapeInfer() const { if (axesMapping.empty()) { return true; } - const int32_t* axesMappingData = reinterpret_cast(getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory().GetPtr()); + const int32_t* axesMappingData = reinterpret_cast(getParentEdgesAtPort(AXES_MAPPING_IDX)[0]->getMemory().GetData()); for (size_t i = 0lu; i < axesMapping.size(); i++) { if (axesMapping[i] != axesMappingData[i]) { return true; @@ -231,8 +231,8 @@ void Broadcast::plainExecute(dnnl::stream strm) { } const size_t workAmountDst = dstStrides[0] * dstDims[0]; - const auto *srcData = reinterpret_cast(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr()->GetPtr()); - auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr()->GetData()); + auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); parallel_nt(0, [&](const int ithr, const int nthr) { size_t i = 0lu, srcIdx = 0lu, start = 0lu, end = 0lu; diff --git a/src/plugins/intel_cpu/src/nodes/bucketize.cpp b/src/plugins/intel_cpu/src/nodes/bucketize.cpp index 32627dbc77f131..f936e4c78f4184 100644 --- a/src/plugins/intel_cpu/src/nodes/bucketize.cpp +++ b/src/plugins/intel_cpu/src/nodes/bucketize.cpp @@ -213,9 +213,9 @@ bool Bucketize::isExecutable() const { template void Bucketize::bucketize() { - const auto *input_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto *boundaries_data = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - auto *output_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const auto *input_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + const auto *boundaries_data = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); + auto *output_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); if (!with_bins) { memset(output_data, 0, num_values * sizeof(T_IND)); diff --git a/src/plugins/intel_cpu/src/nodes/color_convert.cpp b/src/plugins/intel_cpu/src/nodes/color_convert.cpp index d4ae67bea38cba..0dae7af39145bf 100644 --- a/src/plugins/intel_cpu/src/nodes/color_convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/color_convert.cpp @@ -1026,11 +1026,11 @@ InferenceEngine::Precision ColorConvert::Converter::outputPrecision(size_t idx) } const void * ColorConvert::Converter::input(size_t idx) const { - return _node->getParentEdgeAt(idx)->getMemoryPtr()->GetPtr(); + return _node->getParentEdgeAt(idx)->getMemoryPtr()->GetData(); } void * ColorConvert::Converter::output(size_t idx) const { - return _node->getChildEdgeAt(idx)->getMemoryPtr()->GetPtr(); + return _node->getChildEdgeAt(idx)->getMemoryPtr()->GetData(); } const VectorDims & ColorConvert::Converter::inputDims(size_t idx) const { diff --git a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp index 0b3d5f524de896..b366f2b414bd74 100644 --- a/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/tile_broadcast_utils.cpp @@ -247,8 +247,8 @@ void TileBroadcastCommon::broadcastScalar(const char *srcData, char *dstData, si } void TileBroadcastCommon::optimizedExecute(const MemoryPtr& srcMemory, const MemoryPtr& dstMemory) { - auto srcData = reinterpret_cast(srcMemory->GetPtr()); - auto dstData = reinterpret_cast(dstMemory->GetPtr()); + auto srcData = reinterpret_cast(srcMemory->GetData()); + auto dstData = reinterpret_cast(dstMemory->GetData()); if (srcMemory->getStaticDims() == dstMemory->getStaticDims()) { const auto prc = dstMemory->getDesc().getPrecision(); @@ -260,7 +260,7 @@ void TileBroadcastCommon::optimizedExecute(const MemoryPtr& srcMemory, const Mem if (optimizedParams.dstStrides[0] == optimizedParams.dims[5] * optimizedParams.dstStrides[5]) { size_t data_size = optimizedParams.dstStrides[5]; size_t elt_cnt = optimizedParams.dims[5]; - auto srcData_i32 = reinterpret_cast(srcMemory->GetPtr()); + auto srcData_i32 = reinterpret_cast(srcMemory->GetData()); if (data_size == 1) { memset(dstData, srcData[0], elt_cnt); } else if (data_size == 4 && srcData_i32[0] == 0) { diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index ad4bb56ba729d7..ea4fa815720a3c 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -461,7 +461,7 @@ void Concat::execute(dnnl::stream strm) { if (canExecRef) { execRef(); } else { - const Memory& dst_memory = getChildEdgeAt(0)->getMemory(); + const auto& dst_memory = getChildEdgeAt(0)->getMemory(); const size_t num_src = getParentEdges().size(); std::unordered_map mem_ags {{DNNL_ARG_DST, dst_memory.GetPrimitive()}}; size_t nonZeroInShapes = 0; @@ -482,7 +482,7 @@ InferenceEngine::Precision Concat::getRuntimePrecision() const { } void Concat::execNspcSpecCase() { - const Memory& dst_memory = getChildEdgeAt(0)->getMemory(); + const auto& dst_memory = getChildEdgeAt(0)->getMemory(); const size_t num_src = getParentEdges().size(); uint8_t* dst_ptr = reinterpret_cast(dst_memory.GetData()); const size_t dataSize = DnnlExtensionUtils::sizeOfDataType(dst_memory.GetDataType()); @@ -495,7 +495,7 @@ void Concat::execNspcSpecCase() { size_t nonZeroInShapes = 0; int firstNonZeroEdge = -1; for (size_t i = 0; i < num_src; i++) { - const Memory& src_mem = getParentEdgesAtPort(i)[0]->getMemory(); + const auto& src_mem = getParentEdgesAtPort(i)[0]->getMemory(); if (src_mem.GetShape().hasZeroDims()) { continue; } @@ -525,14 +525,14 @@ void Concat::execNspcSpecCase() { void Concat::execRef() { const size_t numSrc = getParentEdges().size(); - const Memory& dstMemory = getChildEdgeAt(0)->getMemory(); + const auto& dstMemory = getChildEdgeAt(0)->getMemory(); const size_t elemSize = DnnlExtensionUtils::sizeOfDataType(dstMemory.GetDataType()); const auto dstMemBlkDesc = dstMemory.getDescPtr()->as(); const auto& outputShape = dstMemBlkDesc->getBlockDims(); uint8_t* dstPtr = reinterpret_cast(dstMemory.GetData()); for (size_t i = 0; i < numSrc; i++) { - const Memory& srcMem = getParentEdgesAtPort(i)[0]->getMemory(); - srcPtrs[i] = reinterpret_cast(srcMem.GetPtr()); + const auto& srcMem = getParentEdgesAtPort(i)[0]->getMemory(); + srcPtrs[i] = reinterpret_cast(srcMem.GetData()); } size_t outputStrides[MAX_RANK_REF] = {0}; @@ -640,8 +640,7 @@ void Concat::resolveInPlaceEdges(Edge::LOOK look) { IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); auto memMngr = std::make_shared(baseMemMngr, numberOfInputs, i); - auto newMem = std::make_shared(getEngine()); - newMem->Create(selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); + auto newMem = std::make_shared(getEngine(), std::unique_ptr(memMngr.get()), selected_pd->getConfig().inConfs[i].getMemDesc()); parentEdge->resetMemoryPtr(newMem); } diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 31a7fa1656bf8d..1079981a452c15 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -907,9 +907,8 @@ void Convolution::addZeroPoints(dnnl::primitive_attr& attr) { attr.set_zero_points_mask(DNNL_ARG_SRC, 0); if (!stockInputZeroPointsMemPtr) { - stockInputZeroPointsMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::I32, {inputZeroPoints.size()}); - stockInputZeroPointsMemPtr->Create(memoryDesc, inputZeroPoints.data()); + stockInputZeroPointsMemPtr.reset(new Memory(getEngine(), memoryDesc, inputZeroPoints.data())); } } @@ -918,9 +917,8 @@ void Convolution::addLegacyZeroPoints(dnnl::primitive_attr& attr) { DEBUG_LOG(getName(), ": Set legacy input zero points"); attr.set_input_zero_points(legacyInputZeroPoints.size(), 1 << 1 /*through C dim*/); if (!legacyInputZeroPointsMemPtr) { - legacyInputZeroPointsMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::U8, {legacyInputZeroPoints.size()}); - legacyInputZeroPointsMemPtr->Create(memoryDesc, legacyInputZeroPoints.data()); + legacyInputZeroPointsMemPtr.reset(new Memory(getEngine(), memoryDesc, legacyInputZeroPoints.data())); } } @@ -929,9 +927,8 @@ void Convolution::addLegacyZeroPoints(dnnl::primitive_attr& attr) { attr.set_weights_zero_points(legacyWeightsZeroPoints.size(), 1 << 1 /*through C dim*/); if (!legacyWeightsZeroPointsMemPtr) { - legacyWeightsZeroPointsMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {legacyWeightsZeroPoints.size()}); - legacyWeightsZeroPointsMemPtr->Create(memoryDesc, legacyWeightsZeroPoints.data()); + legacyWeightsZeroPointsMemPtr.reset(new Memory(getEngine(), memoryDesc, legacyWeightsZeroPoints.data())); } } @@ -940,9 +937,8 @@ void Convolution::addLegacyZeroPoints(dnnl::primitive_attr& attr) { attr.set_output_compensations(legacyOutputCompensation.size(), 1 << 1 /*through C dim*/); if (!legacyOutputCompensationMemPtr) { - legacyOutputCompensationMemPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::I32, {legacyOutputCompensation.size()}); - legacyOutputCompensationMemPtr->Create(memoryDesc, legacyOutputCompensation.data()); + legacyOutputCompensationMemPtr.reset(new Memory(getEngine(), memoryDesc, legacyOutputCompensation.data())); } } } @@ -1269,7 +1265,7 @@ InferenceEngine::Blob::Ptr Convolution::createInternalBlob(InferenceEngine::Size IE_THROW() << "Created internal blob and const blob has different size for node: " << getName() << "."; } - cpu_convert(blb->GetPtr(), + cpu_convert(blb->GetData(), internalBlob->buffer(), DnnlExtensionUtils::DataTypeToIEPrecision(blb->GetDataType()), internalBlob->getTensorDesc().getPrecision(), @@ -1529,7 +1525,7 @@ void Convolution::executeDynamicImpl(dnnl::stream strm) { const size_t sumPortNum = getParentEdges().size() - 1; const auto& sumInpMem = getParentEdgesAtPort(sumPortNum).front()->getMemory(); auto inp1 = subgraph->getInput(1); - inp1->getChildEdgesAtPort(0).front()->getMemoryPtr()->setDataHandle(sumInpMem.GetData()); + // inp1->getChildEdgesAtPort(0).front()->getMemoryPtr()->setDataHandle(sumInpMem.GetData()); subgraph->infer(); diff --git a/src/plugins/intel_cpu/src/nodes/convert.cpp b/src/plugins/intel_cpu/src/nodes/convert.cpp index 9015e2d805f10d..30624aeb65cd98 100644 --- a/src/plugins/intel_cpu/src/nodes/convert.cpp +++ b/src/plugins/intel_cpu/src/nodes/convert.cpp @@ -144,8 +144,8 @@ void Convert::execute(dnnl::stream strm) { if (parentPaddElemCount != childPaddElemCount) IE_THROW() << errorPrefix << " has different elements number in input and output buffers"; - void* srcPtr = parentMem.GetPtr(); - void* dstPtr = childMem.GetPtr(); + void* srcPtr = parentMem.GetData(); + void* dstPtr = childMem.GetData(); cpu_convert(srcPtr, dstPtr, diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp index ae0a1187890873..011cc1ab56ee24 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder.cpp @@ -70,9 +70,9 @@ void CTCGreedyDecoder::initSupportedPrimitiveDescriptors() { } void CTCGreedyDecoder::execute(dnnl::stream strm) { - const float* probabilities = reinterpret_cast(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->GetPtr()); - const float* sequenceMask = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr()); - float* outputSequences = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const float* probabilities = reinterpret_cast(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->GetData()); + const float* sequenceMask = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetData()); + float* outputSequences = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); const size_t T = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[0]; const size_t B = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[1]; diff --git a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp index 34262e85f7fd2d..764cff2dbf66f9 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_greedy_decoder_seq_len.cpp @@ -75,10 +75,10 @@ void CTCGreedyDecoderSeqLen::initSupportedPrimitiveDescriptors() { } void CTCGreedyDecoderSeqLen::execute(dnnl::stream strm) { - const float* probabilities = reinterpret_cast(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->GetPtr()); - const int* sequenceLengths = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr()); - int* decodedClasses = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemoryPtr()->GetPtr()); - int* decodedClassesLength = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_LENGTH_INDEX)[0]->getMemoryPtr()->GetPtr()); + const float* probabilities = reinterpret_cast(getParentEdgeAt(DATA_INDEX)->getMemoryPtr()->GetData()); + const int* sequenceLengths = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetData()); + int* decodedClasses = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemoryPtr()->GetData()); + int* decodedClassesLength = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_LENGTH_INDEX)[0]->getMemoryPtr()->GetData()); const size_t B = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[0];; const size_t T = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[1];; @@ -87,7 +87,7 @@ void CTCGreedyDecoderSeqLen::execute(dnnl::stream strm) { int blankIndex = C - 1; if (inputShapes.size() > BLANK_INDEX) - blankIndex = (reinterpret_cast(getParentEdgeAt(BLANK_INDEX)->getMemoryPtr()->GetPtr()))[0]; + blankIndex = (reinterpret_cast(getParentEdgeAt(BLANK_INDEX)->getMemoryPtr()->GetData()))[0]; size_t workAmount = 0; for (size_t b = 0; b < B; b++) { diff --git a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp index 1acfd6f2bd7fc7..5df5f41fed8e27 100644 --- a/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp +++ b/src/plugins/intel_cpu/src/nodes/ctc_loss.cpp @@ -67,11 +67,11 @@ void CTCLoss::executeDynamicImpl(dnnl::stream strm) { void CTCLoss::execute(dnnl::stream strm) { StatusCode returnCode = OK; - const float* logits = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const int* logitsLength = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - const int* labels = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); - const int* labelsLength = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetPtr()); - float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const float* logits = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + const int* logitsLength = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); + const int* labels = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetData()); + const int* labelsLength = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetData()); + float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); const auto &inDims = getParentEdgeAt(0)->getMemory().getStaticDims(); const size_t batchNum = inDims[0]; @@ -80,7 +80,7 @@ void CTCLoss::execute(dnnl::stream strm) { int blankIndex = classesNum - 1; if (inputShapes.size() > 4) { - blankIndex = reinterpret_cast(getParentEdgeAt(4)->getMemoryPtr()->GetPtr())[0]; + blankIndex = reinterpret_cast(getParentEdgeAt(4)->getMemoryPtr()->GetData())[0]; } std::vector decodedTargetLenB(batchNum, 0); diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp index 64d2af3a9429c1..d3cf38fbe64fa2 100644 --- a/src/plugins/intel_cpu/src/nodes/cum_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/cum_sum.cpp @@ -109,8 +109,8 @@ void CumSum::execute(dnnl::stream strm) { template void CumSum::exec() { - const auto *input = reinterpret_cast(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr()); - auto *output = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const auto *input = reinterpret_cast(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetData()); + auto *output = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); const VectorDims strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType()->getStrides(); if (reverse) { @@ -226,18 +226,18 @@ inline size_t CumSum::getStartOffset(const std::vector &forStartOffset, return startOffset; } -size_t CumSum::getAxis(const Memory& _axis, const Memory& _data) const { +size_t CumSum::getAxis(const IMemory& _axis, const IMemory& _data) const { const auto& axisPrecision = _axis.getDesc().getPrecision(); const int64_t dataShapeSize = static_cast(_data.GetShape().getRank()); int64_t axisValueFromBlob = 0; switch (axisPrecision) { case Precision::I32 : { - const auto *axisPtr = reinterpret_cast(_axis.GetPtr()); + const auto *axisPtr = reinterpret_cast(_axis.GetData()); axisValueFromBlob = static_cast(axisPtr[0]); break; } case Precision::I64 : { - const auto *axisPtr = reinterpret_cast(_axis.GetPtr()); + const auto *axisPtr = reinterpret_cast(_axis.GetData()); axisValueFromBlob = axisPtr[0]; break; } diff --git a/src/plugins/intel_cpu/src/nodes/cum_sum.h b/src/plugins/intel_cpu/src/nodes/cum_sum.h index 44b1f7a8e6d1df..eee2da8c085472 100644 --- a/src/plugins/intel_cpu/src/nodes/cum_sum.h +++ b/src/plugins/intel_cpu/src/nodes/cum_sum.h @@ -38,7 +38,7 @@ class CumSum : public Node { inline size_t getStartOffset(const std::vector &forStartOffset, const std::vector& strides) const; - size_t getAxis(const Memory& _axis, const Memory& _data) const; + size_t getAxis(const IMemory& _axis, const IMemory& _data) const; enum { CUM_SUM_DATA, AXIS, numOfInputs }; bool exclusive; diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index f1ed2e450c1d4a..353aaa730c120f 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -269,7 +269,7 @@ InferenceEngine::Blob::Ptr Deconvolution::createWeiBlobAsIO(InferenceEngine::Siz if (intBuffSize < offset) { IE_THROW() << "Cannot create internal buffer. Buffer can be overrun."; } - cpu_memcpy_s(data, intBuffSize, blb->GetPtr(), blbSize); + cpu_memcpy_s(data, intBuffSize, blb->GetData(), blbSize); return internalBlob; } @@ -618,8 +618,7 @@ VectorDims Deconvolution::shapeInferInternal(const VectorDims &inDims, std::vect outSpDimsVecShape = {outSpDims.size()}; inputShapesRefs.push_back(std::cref(outSpDimsVecShape)); CpuBlockedMemoryDesc desc(Precision::I32, Shape(outSpDimsVecShape)); - auto mem = std::make_shared(getEngine()); - mem->Create(desc, outSpDims.data()); + auto mem = std::make_shared(getEngine(), desc, outSpDims.data()); inputValues[i] = mem; break; } @@ -1163,7 +1162,7 @@ std::vector Deconvolution::readOutputSpatialDims() const { if (shapeMemPtr->getStaticDims()[0] != spDimsNum) { IE_THROW() << "Can't read output spatial dims, beause 'output_shape' input has incorrect number of elements"; } - const int32_t *outShapePtr = reinterpret_cast(shapeMemPtr->GetPtr()); + const int32_t *outShapePtr = reinterpret_cast(shapeMemPtr->GetData()); std::vector outSpDims(outShapePtr, outShapePtr + shapeMemPtr->getStaticDims()[0]); return outSpDims; } diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp index 3d8be09f196dbf..e8edc577d35d1a 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp @@ -1284,15 +1284,15 @@ void DeformableConvolution::execute(dnnl::stream strm) { auto &srcMemory2 = getParentEdgeAt(2)->getMemory(); auto &dstMemory = getChildEdgeAt(0)->getMemory(); - const auto *src = reinterpret_cast(srcMemory0.GetPtr()); - const auto *offsets = reinterpret_cast(srcMemory1.GetPtr()); - const auto *weights = reinterpret_cast(srcMemory2.GetPtr()); + const auto *src = reinterpret_cast(srcMemory0.GetData()); + const auto *offsets = reinterpret_cast(srcMemory1.GetData()); + const auto *weights = reinterpret_cast(srcMemory2.GetData()); float* modulation = nullptr; if (inputsNumber > 3) { - modulation = reinterpret_cast(getParentEdgeAt(3)->getMemory().GetPtr()); + modulation = reinterpret_cast(getParentEdgeAt(3)->getMemory().GetData()); } - float *dst = reinterpret_cast(dstMemory.GetPtr()); + float *dst = reinterpret_cast(dstMemory.GetData()); auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) diff --git a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp index 4d18e8ae720f03..7a55c38393f99b 100644 --- a/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp +++ b/src/plugins/intel_cpu/src/nodes/depth_to_space.cpp @@ -292,8 +292,8 @@ void DepthToSpace::DepthToSpaceExecutor::exec(const MemoryPtr& srcMemPtr, const if (!permuteKernel) IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled."; - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->GetData()); permuteKernel->execute(srcData, dstData, MB); } diff --git a/src/plugins/intel_cpu/src/nodes/detection_output.cpp b/src/plugins/intel_cpu/src/nodes/detection_output.cpp index 82eae313fd168f..b760d67073de04 100644 --- a/src/plugins/intel_cpu/src/nodes/detection_output.cpp +++ b/src/plugins/intel_cpu/src/nodes/detection_output.cpp @@ -169,15 +169,15 @@ void DetectionOutput::executeDynamicImpl(dnnl::stream strm) { } void DetectionOutput::execute(dnnl::stream strm) { - float *dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + float *dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); - const float *locData = reinterpret_cast(getParentEdgeAt(ID_LOC)->getMemoryPtr()->GetPtr()); - const float *confData = reinterpret_cast(getParentEdgeAt(ID_CONF)->getMemoryPtr()->GetPtr()); - const float *priorData = reinterpret_cast(getParentEdgeAt(ID_PRIOR)->getMemoryPtr()->GetPtr()); + const float *locData = reinterpret_cast(getParentEdgeAt(ID_LOC)->getMemoryPtr()->GetData()); + const float *confData = reinterpret_cast(getParentEdgeAt(ID_CONF)->getMemoryPtr()->GetData()); + const float *priorData = reinterpret_cast(getParentEdgeAt(ID_PRIOR)->getMemoryPtr()->GetData()); const float *ARMConfData = inputShapes.size() > 3 ? - reinterpret_cast(getParentEdgeAt(ID_ARM_CONF)->getMemoryPtr()->GetPtr()) : nullptr; + reinterpret_cast(getParentEdgeAt(ID_ARM_CONF)->getMemoryPtr()->GetData()) : nullptr; const float *ARMLocData = inputShapes.size() > 4 ? - reinterpret_cast(getParentEdgeAt(ID_ARM_LOC)->getMemoryPtr()->GetPtr()) : nullptr; + reinterpret_cast(getParentEdgeAt(ID_ARM_LOC)->getMemoryPtr()->GetData()) : nullptr; float *reorderedConfData = reorderedConf.data(); int *reorderedConfDataIndices = reinterpret_cast(reorderedConf.data()); diff --git a/src/plugins/intel_cpu/src/nodes/dft.cpp b/src/plugins/intel_cpu/src/nodes/dft.cpp index 8501272d8224c2..b23aa473f78467 100644 --- a/src/plugins/intel_cpu/src/nodes/dft.cpp +++ b/src/plugins/intel_cpu/src/nodes/dft.cpp @@ -239,8 +239,8 @@ void DFT::execute(dnnl::stream strm) { const auto inputDataEdge = getParentEdgeAt(DATA_INDEX); const auto outputDataEdge = getChildEdgeAt(0); - const auto src = reinterpret_cast(inputDataEdge->getMemoryPtr()->GetPtr()); - auto dst = reinterpret_cast(outputDataEdge->getMemoryPtr()->GetPtr()); + const auto src = reinterpret_cast(inputDataEdge->getMemoryPtr()->GetData()); + auto dst = reinterpret_cast(outputDataEdge->getMemoryPtr()->GetData()); const auto inputRank = inputDataEdge->getMemory().GetShape().getRank(); @@ -542,7 +542,7 @@ void DFT::prepareParams() { std::vector DFT::getAxes() const { auto axesEdge = getParentEdgeAt(AXES_INDEX); - const auto* axesStartPtr = reinterpret_cast(axesEdge->getMemoryPtr()->GetPtr()); + const auto* axesStartPtr = reinterpret_cast(axesEdge->getMemoryPtr()->GetData()); auto axes = std::vector(axesStartPtr, axesStartPtr + axesEdge->getMemory().getStaticDims()[0]); for (auto& axis : axes) { if (axis < 0) { diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index c4ab140804eddb..898f1048ecb8c9 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -2417,10 +2417,8 @@ void Eltwise::fuseInto(NodePtr& parentNode) { void Eltwise::appendMemory(const std::vector &data, MemoryPtr &memPtr, std::vector& postOpsMem) { if (!memPtr) { - memPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {data.size()}); - memPtr->Create(memoryDesc, data.data()); - + memPtr.reset(new Memory(getEngine(), memoryDesc, data.data())); postOpsMem.push_back(memPtr); } } diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp index 18f765ad195de0..1a9725a571d238 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_offset_sum.cpp @@ -81,11 +81,11 @@ void EmbeddingBagOffsetSum::prepareParams() { } void EmbeddingBagOffsetSum::initFromInputs() { - indicesData_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); - offsetsData_ = reinterpret_cast(getParentEdgeAt(OFFSETS_IDX)->getMemoryPtr()->GetPtr()); + indicesData_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetData()); + offsetsData_ = reinterpret_cast(getParentEdgeAt(OFFSETS_IDX)->getMemoryPtr()->GetData()); if (getParentEdges().size() > DEFAULT_INDEX_IDX) { - defaultIndices_ = reinterpret_cast(getParentEdgeAt(DEFAULT_INDEX_IDX)->getMemoryPtr()->GetPtr()); + defaultIndices_ = reinterpret_cast(getParentEdgeAt(DEFAULT_INDEX_IDX)->getMemoryPtr()->GetData()); } } @@ -131,10 +131,10 @@ bool EmbeddingBagOffsetSum::isExecutable() const { } void EmbeddingBagOffsetSum::execute(dnnl::stream strm) { - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); const uint8_t* weightsData = nullptr; if (_withWeights) - weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); + weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetData()); const auto &inputMem = getParentEdgeAt(0)->getMemory(); EmbeddingBagSum::execute(srcData, weightsData, inputMem.getDesc().getPrecision(), diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp index 8fde7972a18148..18c91e54f02910 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_packed_sum.cpp @@ -75,7 +75,7 @@ void EmbeddingBagPackedSum::prepareParams() { } void EmbeddingBagPackedSum::initFromInputs() { - _indices = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); + _indices = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetData()); } void EmbeddingBagPackedSum::getIndices(size_t embIndex, const int*& indices, size_t& size, int& weightsIdx, bool& withWeight) { @@ -99,10 +99,10 @@ bool EmbeddingBagPackedSum::isExecutable() const { } void EmbeddingBagPackedSum::execute(dnnl::stream strm) { - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); const uint8_t* weightsData = nullptr; if (_withWeights) - weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); + weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetData()); const auto &inputMem = getParentEdgeAt(0)->getMemory(); EmbeddingBagSum::execute(srcData, weightsData, inputMem.getDesc().getPrecision(), diff --git a/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp index 13b19a45d1ad0a..beb7b1bb75ce6f 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_bag_sum.cpp @@ -54,7 +54,7 @@ void EmbeddingBagSum::processData(const T* srcData, const T* weightsData, initFromInputs(); const size_t outputBagsNum = outMemory->GetShape().getStaticDims()[0]; - auto *dstData = reinterpret_cast(outMemory->GetPtr()); + auto *dstData = reinterpret_cast(outMemory->GetData()); auto threadBody = [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); diff --git a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp index e0fee5ee8b2c4c..55fd32e24f3940 100644 --- a/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp +++ b/src/plugins/intel_cpu/src/nodes/embedding_segments_sum.cpp @@ -83,14 +83,14 @@ void EmbeddingSegmentsSum::prepareParams() { } void EmbeddingSegmentsSum::initFromInputs() { - indices_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); + indices_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetData()); indicesSize_ = getParentEdgeAt(INDICES_IDX)->getMemory().GetShape().getElementsCount(); - segmentIds_ = reinterpret_cast(getParentEdgeAt(SEGMENT_ID_IDX)->getMemoryPtr()->GetPtr()); + segmentIds_ = reinterpret_cast(getParentEdgeAt(SEGMENT_ID_IDX)->getMemoryPtr()->GetData()); lastNumSegments_ = getNumSegments(); if (getParentEdges().size() > DEFAULT_INDEX_IDX) { - defaultIndices_ = reinterpret_cast(getParentEdgeAt(DEFAULT_INDEX_IDX)->getMemoryPtr()->GetPtr()); + defaultIndices_ = reinterpret_cast(getParentEdgeAt(DEFAULT_INDEX_IDX)->getMemoryPtr()->GetData()); } } @@ -123,7 +123,7 @@ void EmbeddingSegmentsSum::getIndices(size_t embIndex, const int*& indices, size } int32_t EmbeddingSegmentsSum::getNumSegments() const { - return reinterpret_cast(getParentEdgesAtPort(NUM_SEGMENTS_IDX)[0]->getMemory().GetPtr())[0]; + return reinterpret_cast(getParentEdgesAtPort(NUM_SEGMENTS_IDX)[0]->getMemory().GetData())[0]; } bool EmbeddingSegmentsSum::needShapeInfer() const { @@ -147,10 +147,10 @@ bool EmbeddingSegmentsSum::isExecutable() const { } void EmbeddingSegmentsSum::execute(dnnl::stream strm) { - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); const uint8_t* weightsData = nullptr; if (_withWeights) - weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); + weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetData()); const auto &inputMem = getParentEdgeAt(0)->getMemory(); EmbeddingBagSum::execute(srcData, weightsData, inputMem.getDesc().getPrecision(), diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp index 73da4fbcc4538f..d42aa160f55bd9 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_eltwise.cpp @@ -475,10 +475,10 @@ bool AclEltwiseExecutor::init(const EltwiseAttrs &eltwiseAttrs, const std::vecto void AclEltwiseExecutor::exec(const std::vector &src, const std::vector &dst, const void *post_ops_data_) { for (size_t i = 0; i < src.size(); i++) { - srcTensors[i].allocator()->import_memory(src[i]->GetPtr()); + srcTensors[i].allocator()->import_memory(src[i]->GetData()); } for (size_t i = 0; i < dst.size(); i++) { - dstTensors[i].allocator()->import_memory(dst[i]->GetPtr()); + dstTensors[i].allocator()->import_memory(dst[i]->GetData()); } exec_func(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_interpolate.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_interpolate.cpp index 400c06981c7867..5dd51e9bf7c7dc 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_interpolate.cpp @@ -93,7 +93,7 @@ bool ov::intel_cpu::ACLInterpolateExecutor::init(const InterpolateAttrs &interpo void ov::intel_cpu::ACLInterpolateExecutor::exec(const std::vector& src, const std::vector& dst, const void *post_ops_data_) { auto in_ptr_ = padPreprocess(src, dst); srcTensor.allocator()->import_memory(const_cast(reinterpret_cast(in_ptr_))); - dstTensor.allocator()->import_memory(dst[0]->GetPtr()); + dstTensor.allocator()->import_memory(dst[0]->GetData()); acl_scale->run(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp index 84ae23e31e98c8..3890b6111f2bc2 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_mvn.cpp @@ -63,8 +63,8 @@ bool AclMVNExecutor::init(const MVNAttrs& mvnAttrs, } void AclMVNExecutor::exec(const std::vector& src, const std::vector& dst, const void *post_ops_data_) { - srcTensor.allocator()->import_memory(src[0]->GetPtr()); - dstTensor.allocator()->import_memory(dst[0]->GetPtr()); + srcTensor.allocator()->import_memory(src[0]->GetData()); + dstTensor.allocator()->import_memory(dst[0]->GetData()); mvn->run(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp index 4e152e3987abfc..f1e15768361246 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_pooling.cpp @@ -173,9 +173,9 @@ bool AclPoolingExecutor::init(const PoolingAttrs& poolingAttrs, } void AclPoolingExecutor::exec(const std::vector& src, const std::vector& dst, std::unordered_map postOpsArgs) { - srcTensor.allocator()->import_memory(src[0]->GetPtr()); - dstTensor.allocator()->import_memory(dst[0]->GetPtr()); - if (dst.size() > 1u) indTensor.allocator()->import_memory(dst[1]->GetPtr()); + srcTensor.allocator()->import_memory(src[0]->GetData()); + dstTensor.allocator()->import_memory(dst[0]->GetData()); + if (dst.size() > 1u) indTensor.allocator()->import_memory(dst[1]->GetData()); exec_func(); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp index b0fa90831bb7f9..74e3f521ffc8c0 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_reduce.cpp @@ -92,8 +92,8 @@ bool AclReduceExecutor::init(const ReduceAttrs& reduceAttrs, } void AclReduceExecutor::exec(const std::vector& src, const std::vector& dst, const void *post_ops_data_) { - srcTensor.allocator()->import_memory(src[0]->GetPtr()); - dstTensor.allocator()->import_memory(dst[0]->GetPtr()); + srcTensor.allocator()->import_memory(src[0]->GetData()); + dstTensor.allocator()->import_memory(dst[0]->GetData()); exec_func(); diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp index 8f420de0deef61..884320861f5752 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_detection_output.cpp @@ -277,14 +277,14 @@ void ExperimentalDetectronDetectionOutput::execute(dnnl::stream strm) { assert(classes_num_ == static_cast(getParentEdgeAt(INPUT_SCORES)->getMemory().getStaticDims()[1])); assert(4 * classes_num_ == static_cast(getParentEdgeAt(INPUT_DELTAS)->getMemory().getStaticDims()[1])); - const auto* boxes = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); - const auto* deltas = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetPtr()); - const auto* scores = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->GetPtr()); - const auto* im_info = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->GetPtr()); - - auto* output_boxes = reinterpret_cast(getChildEdgesAtPort(OUTPUT_BOXES)[0]->getMemoryPtr()->GetPtr()); - auto* output_scores = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetPtr()); - auto* output_classes = reinterpret_cast(getChildEdgesAtPort(OUTPUT_CLASSES)[0]->getMemoryPtr()->GetPtr()); + const auto* boxes = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetData()); + const auto* deltas = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetData()); + const auto* scores = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->GetData()); + const auto* im_info = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->GetData()); + + auto* output_boxes = reinterpret_cast(getChildEdgesAtPort(OUTPUT_BOXES)[0]->getMemoryPtr()->GetData()); + auto* output_scores = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetData()); + auto* output_classes = reinterpret_cast(getChildEdgesAtPort(OUTPUT_CLASSES)[0]->getMemoryPtr()->GetData()); const float img_H = im_info[0]; const float img_W = im_info[1]; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp index bad80a534ba2c8..6c7aa2a9ee6887 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_generate_proposals_single_image.cpp @@ -349,13 +349,13 @@ void ExperimentalDetectronGenerateProposalsSingleImage::execute(dnnl::stream str IE_THROW() << "'Deltas' blob size for ONNXProposal is incompatible with 'scores' blob size!"; // Prepare memory - const float *p_deltas_item = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetPtr()); - const float *p_scores_item = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->GetPtr()); - const float *p_anchors_item = reinterpret_cast(getParentEdgeAt(INPUT_ANCHORS)->getMemoryPtr()->GetPtr()); - const float *p_img_info_cpu = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->GetPtr()); + const float *p_deltas_item = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetData()); + const float *p_scores_item = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->GetData()); + const float *p_anchors_item = reinterpret_cast(getParentEdgeAt(INPUT_ANCHORS)->getMemoryPtr()->GetData()); + const float *p_img_info_cpu = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->GetData()); - float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); - float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetPtr()); + float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetData()); + float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetData()); const int anchors_num = scoreDims[0]; diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp index a0a3ecde0a92c8..f80952e47710fa 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_priorgridgenerator.cpp @@ -70,8 +70,8 @@ void ExperimentalDetectronPriorGridGenerator::execute(dnnl::stream strm) { const float step_w = stride_w_ ? stride_w_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getMemory().getStaticDims()[3]) / layer_width; const float step_h = stride_h_ ? stride_h_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getMemory().getStaticDims()[2]) / layer_height; - const auto *bottom_data_0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto *top_data_0 = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); + const auto *bottom_data_0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + auto *top_data_0 = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetData()); for (int h = 0; h < layer_height; ++h) { for (int w = 0; w < layer_width; ++w) { diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp index 8f6d67d30f3b93..5345ad61042611 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_roifeatureextractor.cpp @@ -335,11 +335,11 @@ void ExperimentalDetectronROIFeatureExtractor::execute(dnnl::stream strm) { const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getMemory().getStaticDims()[1]; const int feaxels_per_roi = pooled_height_ * pooled_width_ * channels_num; - auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); - auto *output_rois_features = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROI_FEATURES)[0]->getMemoryPtr()->GetPtr()); + auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetData()); + auto *output_rois_features = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROI_FEATURES)[0]->getMemoryPtr()->GetData()); float *output_rois = nullptr; if (OUTPUT_ROIS < outputShapes.size()) { - output_rois = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); + output_rois = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetData()); } std::vector level_ids(num_rois, 0); @@ -357,7 +357,7 @@ void ExperimentalDetectronROIFeatureExtractor::execute(dnnl::stream strm) { const int level_rois_offset = rois_per_level[i]; const int level_rois_num = rois_per_level[i + 1] - level_rois_offset; if (level_rois_num > 0) { - auto *featuremap = reinterpret_cast(getParentEdgeAt(INPUT_FEATURES_START + i)->getMemoryPtr()->GetPtr()); + auto *featuremap = reinterpret_cast(getParentEdgeAt(INPUT_FEATURES_START + i)->getMemoryPtr()->GetData()); const int featuremap_height = getParentEdgeAt(INPUT_FEATURES_START + i)->getMemory().getStaticDims()[2]; const int featuremap_width = getParentEdgeAt(INPUT_FEATURES_START + i)->getMemory().getStaticDims()[3]; ROIAlignForward_cpu_kernel(feaxels_per_roi * level_rois_num, diff --git a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp index 45cb3ad579119e..13216303dfb250 100644 --- a/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp +++ b/src/plugins/intel_cpu/src/nodes/experimental_detectron_topkrois.cpp @@ -67,9 +67,9 @@ void ExperimentalDetectronTopKROIs::execute(dnnl::stream strm) { const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0]; const int top_rois_num = (std::min)(max_rois_num_, input_rois_num); - auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); - auto *input_probs = reinterpret_cast(getParentEdgeAt(INPUT_PROBS)->getMemoryPtr()->GetPtr()); - auto *output_rois = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); + auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetData()); + auto *input_probs = reinterpret_cast(getParentEdgeAt(INPUT_PROBS)->getMemoryPtr()->GetData()); + auto *output_rois = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetData()); std::vector idx(input_rois_num); iota(idx.begin(), idx.end(), 0); diff --git a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp index 6550d55b4d85a3..caa0c652e383e3 100644 --- a/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp +++ b/src/plugins/intel_cpu/src/nodes/extract_image_patches.cpp @@ -419,8 +419,8 @@ void ExtractImagePatches::initSupportedPrimitiveDescriptors() { void ExtractImagePatches::execute(dnnl::stream strm) { if (execPtr) { - auto src = getParentEdgeAt(0)->getMemoryPtr()->GetPtr(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr(); + auto src = getParentEdgeAt(0)->getMemoryPtr()->GetData(); + auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData(); const auto inStrides = getParentEdgeAt(0)->getMemory().GetDescWithType()->getStrides(); const auto outStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType()->getStrides(); execPtr->exec(src, dst, inStrides, outStrides); diff --git a/src/plugins/intel_cpu/src/nodes/eye.cpp b/src/plugins/intel_cpu/src/nodes/eye.cpp index 01c08fd164df55..115826f23c17f1 100644 --- a/src/plugins/intel_cpu/src/nodes/eye.cpp +++ b/src/plugins/intel_cpu/src/nodes/eye.cpp @@ -110,7 +110,7 @@ void Eye::executeSpecified() { auto outPtr = getChildEdgeAt(0)->getMemoryPtr(); if (!outPtr || !outPtr ->isAllocated()) THROW_ERROR << errorPrefix << "Destination memory didn't allocate."; - T *dst = reinterpret_cast(outPtr->GetPtr()); + T *dst = reinterpret_cast(outPtr->GetData()); const size_t batchVolume = getBatchVolume(getBatchShape()); const size_t spatialCount = colNum * rowNum; diff --git a/src/plugins/intel_cpu/src/nodes/eye.h b/src/plugins/intel_cpu/src/nodes/eye.h index 0bf22bf8a1a03d..dbc0851b75886f 100644 --- a/src/plugins/intel_cpu/src/nodes/eye.h +++ b/src/plugins/intel_cpu/src/nodes/eye.h @@ -45,7 +45,7 @@ class Eye : public Node { auto rowMem = getParentEdgeAt(ROWS_NUM)->getMemoryPtr(); if (rowMem == nullptr) IE_THROW() << errorPrefix << " doesn't contain row_count data"; - const int *rowPtr = reinterpret_cast(rowMem->GetPtr()); + const int *rowPtr = reinterpret_cast(rowMem->GetData()); return rowPtr[0]; } @@ -53,7 +53,7 @@ class Eye : public Node { auto colMem = getParentEdgeAt(COLS_NUM)->getMemoryPtr(); if (colMem == nullptr) IE_THROW() << errorPrefix << " doesn't contain col_count data"; - const int *colPtr = reinterpret_cast(colMem->GetPtr()); + const int *colPtr = reinterpret_cast(colMem->GetData()); return colPtr[0]; } @@ -61,7 +61,7 @@ class Eye : public Node { auto diagIndMem = getParentEdgeAt(DIAGONAL_INDEX)->getMemoryPtr(); if (diagIndMem == nullptr) IE_THROW() << errorPrefix << " doesn't contain diag_index data"; - const int *diagIndexPtr = reinterpret_cast(diagIndMem->GetPtr()); + const int *diagIndexPtr = reinterpret_cast(diagIndMem->GetData()); return diagIndexPtr[0]; } @@ -69,7 +69,7 @@ class Eye : public Node { if (withBatchShape) { const int batchShapeSize = static_cast(getParentEdgeAt(BATCH_SHAPE)->getMemoryPtr()->GetShape().getElementsCount()); std::vector batchShape(batchShapeSize); - const int *batchShapePtr = reinterpret_cast(getParentEdgeAt(BATCH_SHAPE)->getMemoryPtr()->GetPtr()); + const int *batchShapePtr = reinterpret_cast(getParentEdgeAt(BATCH_SHAPE)->getMemoryPtr()->GetData()); batchShape.assign(batchShapePtr, batchShapePtr + batchShapeSize); return batchShape; } else { diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp index 7e507174efce4f..b4b16e692f6bb8 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp @@ -1415,8 +1415,7 @@ void FakeQuantize::prepareParams() { } if (internalBlobMemory.empty() || needUpdThr) { - auto binarizationThresholdsDataMem = std::make_shared(getEngine()); - binarizationThresholdsDataMem->Create(weightsDataDesc, getBinarizationTresholdsPtr()); + auto binarizationThresholdsDataMem = std::make_shared(getEngine(), weightsDataDesc, getBinarizationTresholdsPtr()); if (internalBlobMemory.empty()) { internalBlobMemory.push_back(binarizationThresholdsDataMem); } else { @@ -1425,8 +1424,7 @@ void FakeQuantize::prepareParams() { } if (internalBlobMemory.size() == (numBinFqIntBlob - 1) || needUpdMask) { - auto binarizationMaskDataMem = std::make_shared(getEngine()); - binarizationMaskDataMem->Create(weightsDataDesc, getBinarizationOutputMaskPtr()); + auto binarizationMaskDataMem = std::make_shared(getEngine(), weightsDataDesc, getBinarizationOutputMaskPtr()); if (internalBlobMemory.size() == (numBinFqIntBlob - 1)) { internalBlobMemory.push_back(binarizationMaskDataMem); } else { @@ -1489,7 +1487,7 @@ void FakeQuantize::executeReference() { auto srcMemory = getParentEdgeAt(0)->getMemoryPtr(); auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); - auto src = reinterpret_cast(srcMemory->GetPtr()); + auto src = reinterpret_cast(srcMemory->GetData()); auto srcDims = srcMemory->getStaticDims(); auto dstDims = dstMemory->getStaticDims(); @@ -1516,7 +1514,7 @@ void FakeQuantize::executeReference() { } d_str[1] = tmp; - auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto dst = reinterpret_cast(dstMemory->GetData()); const int nbits = 8; const int CB = impl::utils::div_up(C, nbits); @@ -1552,7 +1550,7 @@ void FakeQuantize::executeReference() { dst[dst_off / nbits] = bin_val; }); } else { - auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto dst = reinterpret_cast(dstMemory->GetData()); parallel_nd(N, C, D, H, W, [&](dim_t n, dim_t c, dim_t d, dim_t h, dim_t w) { size_t src_off = srcDims.size() == 5 ? @@ -1599,8 +1597,8 @@ void FakeQuantize::executeBinarization(const std::unique_ptrgetMemoryPtr(); auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); - auto src = reinterpret_cast(srcMemory->GetPtr()); - auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto src = reinterpret_cast(srcMemory->GetData()); + auto dst = reinterpret_cast(dstMemory->GetData()); auto thresholds = reinterpret_cast(internalBlobMemory[0]->GetData()); auto output_mask = reinterpret_cast(internalBlobMemory[1]->GetData()); @@ -1641,8 +1639,8 @@ void FakeQuantize::executeQuantization(const std::unique_ptrgetMemoryPtr(); auto dstMemory = getChildEdgeAt(0)->getMemoryPtr(); - auto src = reinterpret_cast(srcMemory->GetPtr()); - auto dst = reinterpret_cast(dstMemory->GetPtr()); + auto src = reinterpret_cast(srcMemory->GetData()); + auto dst = reinterpret_cast(dstMemory->GetData()); auto& srcDesc = srcMemory->getDesc(); auto srcDims = srcDesc.getShape().getStaticDims(); @@ -1838,9 +1836,8 @@ void FakeQuantize::initializePostOpDataLegacy(const VectorDims &dims, const size void FakeQuantize::appendMemory(const size_t dataSize, const void *data, MemoryPtr &memPtr, std::vector& postOpsMem) { if (!memPtr) { - memPtr.reset(new Memory(getEngine())); DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {dataSize}); - memPtr->Create(memoryDesc, data); + memPtr.reset(new Memory(getEngine(), memoryDesc, data)); postOpsMem.push_back(memPtr); } diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 2ec558f5740388..f48d337dfce16f 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -883,7 +883,7 @@ bool FullyConnected::useSparseWeightsDecompression() { if (blb == nullptr) IE_THROW() << "Cannot get const blob for node " << getName() << "."; - auto weightsData = reinterpret_cast(blb->GetPtr()); + auto weightsData = reinterpret_cast(blb->GetData()); auto elementsCount = blb->GetDescWithType()->getPaddedElementsCount(); size_t zerosCounts = 0; for (size_t i = 0; i < elementsCount; i++) { diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index 21720918ab91c0..6e7c9e8d615126 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -62,7 +62,7 @@ class GatherShapeInfer : public ShapeInferEmptyPads { IE_THROW() << "Unsupported precision " << data_dependency.at(GATHER_AXIS)->getDesc().getPrecision() << " for axis tensor."; } - m_axis = reinterpret_cast(data_dependency.at(GATHER_AXIS)->GetPtr())[0]; + m_axis = reinterpret_cast(data_dependency.at(GATHER_AXIS)->GetData())[0]; } if (m_axis < 0) @@ -303,7 +303,7 @@ bool Gather::needPrepareParams() const { } bool result = inputShapesModified(); if (!isAxisInputConst) - result = result || axis != (reinterpret_cast(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->GetPtr()))[0]; + result = result || axis != (reinterpret_cast(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->GetData()))[0]; return result; } @@ -318,7 +318,7 @@ void Gather::prepareParams() { THROW_ERROR << " has unidentified preferable primitive descriptor."; if (!isAxisInputConst) { - axis = (reinterpret_cast(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->GetPtr()))[0]; + axis = (reinterpret_cast(getParentEdgeAt(GATHER_AXIS)->getMemoryPtr()->GetData()))[0]; if (axis < 0) axis += dataSrcRank; if (axis < 0 || axis >= dataSrcRank || batchDims > axis) @@ -365,9 +365,9 @@ void Gather::prepareParams() { void Gather::execute(dnnl::stream strm) { #if defined(OPENVINO_ARCH_X86_64) if (jitKernel && jitKernel->isSupportedConfiguration(afterAxisSize)) { - const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr(); - const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr(); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetData(); + const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetData(); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); const uint64_t dataElPerVec = jitKernel->getDataElPerVec(); @@ -421,9 +421,9 @@ void Gather::execute(dnnl::stream strm) { void Gather::executeDynamicImpl(dnnl::stream strm) { #if defined(OPENVINO_ARCH_X86_64) if (jitKernel && jitKernel->isSupportedConfiguration(afterAxisSize)) { - const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr(); - const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr(); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const void* srcIndices = getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetData(); + const void* srcData = getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetData(); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); const uint64_t dataElPerVec = jitKernel->getDataElPerVec(); @@ -548,9 +548,9 @@ void Gather::initShortParams(threadExecParams& p, const uint64_t start) { } void Gather::execReference() { - const int32_t* srcIndices = reinterpret_cast(getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetPtr()); - const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetPtr()); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const int32_t* srcIndices = reinterpret_cast(getParentEdgeAt(GATHER_INDICES)->getMemoryPtr()->GetData()); + const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(GATHER_DATA)->getMemoryPtr()->GetData()); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); const size_t dstAfterBatchSize = betweenBatchAndAxisSize * specIdxAndAfterAxSizeB; parallel_for2d(beforeBatchSize, specIndicesSize, [&](const size_t b, const size_t j) { @@ -607,8 +607,7 @@ void Gather::resolveInPlaceEdges(Edge::LOOK look) { // getName() << " with type " << getTypeStr(); auto memMngr = std::make_shared(baseMemMngr, baseDim, offset); - auto newMem = std::make_shared(getEngine()); - newMem->Create(config.outConfs[outputPort].getMemDesc(), memMngr); + auto newMem = std::make_shared(getEngine(), std::unique_ptr(memMngr.get()), config.outConfs[outputPort].getMemDesc()); childEdge->resetMemoryPtr(newMem); } diff --git a/src/plugins/intel_cpu/src/nodes/gather_elements.cpp b/src/plugins/intel_cpu/src/nodes/gather_elements.cpp index 143bf5352386ef..b51a5f01d9fa78 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_elements.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_elements.cpp @@ -103,9 +103,9 @@ void GatherElements::executeDynamicImpl(dnnl::stream strm) { template void GatherElements::directExecution() { - const auto *srcData = reinterpret_cast(getParentEdgeAt(dataIndex_)->getMemoryPtr()->GetPtr()); - const auto *indices = reinterpret_cast(getParentEdgeAt(indicesIndex_)->getMemoryPtr()->GetPtr()); - auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(dataIndex_)->getMemoryPtr()->GetData()); + const auto *indices = reinterpret_cast(getParentEdgeAt(indicesIndex_)->getMemoryPtr()->GetData()); + auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); const int outSize = getChildEdgesAtPort(0)[0]->getMemory().GetShape().getElementsCount(); auto threadBody = [&](const int ithr, const int nthr) { diff --git a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp index 1d827d4f8d7f1d..2834d78bf7592a 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_nd.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_nd.cpp @@ -150,9 +150,9 @@ void GatherND::GatherNDExecutor::exec(const MemoryPtr& srcMemPtr, const MemoryPt } void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr) { - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - const int32_t* indices = reinterpret_cast(idxMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetData()); + const int32_t* indices = reinterpret_cast(idxMemPtr->GetData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->GetData()); parallel_nt(0, [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); @@ -187,9 +187,9 @@ void GatherND::GatherNDExecutor::gatherBlocks(const MemoryPtr& srcMemPtr, const template void GatherND::GatherNDExecutor::gatherElementwise(const MemoryPtr& srcMemPtr, const MemoryPtr& idxMemPtr, const MemoryPtr& dstMemPtr) { - const dataType* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - const int32_t* indices = reinterpret_cast(idxMemPtr->GetPtr()); - dataType* dstData = reinterpret_cast(dstMemPtr->GetPtr()); + const dataType* srcData = reinterpret_cast(srcMemPtr->GetData()); + const int32_t* indices = reinterpret_cast(idxMemPtr->GetData()); + dataType* dstData = reinterpret_cast(dstMemPtr->GetData()); parallel_nt(0, [&](const int ithr, const int nthr) { size_t start(0lu), end(0lu); diff --git a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp index 16cec71e2043fb..777ab23cd1ce1d 100644 --- a/src/plugins/intel_cpu/src/nodes/gather_tree.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather_tree.cpp @@ -141,11 +141,11 @@ GatherTree::GatherTreeExecutor::GatherTreeExecutor(const VectorDims& stepIdxDims template void GatherTree::GatherTreeExecutor::exec(const MemoryPtr& stepIdxMemPtr, const MemoryPtr& parentIdxMemPtr, const MemoryPtr& maxSeqLenMemPtr, const MemoryPtr& endTokenMemPtr, const MemoryPtr& dstMemPtr) { - const auto *stepIdx = reinterpret_cast(stepIdxMemPtr->GetPtr()); - const auto *parentIdx = reinterpret_cast(parentIdxMemPtr->GetPtr()); - const auto *maxSeqLen = reinterpret_cast(maxSeqLenMemPtr->GetPtr()); - const auto endToken = (reinterpret_cast(endTokenMemPtr->GetPtr()))[0]; - auto *finalIdx = reinterpret_cast(dstMemPtr->GetPtr()); + const auto *stepIdx = reinterpret_cast(stepIdxMemPtr->GetData()); + const auto *parentIdx = reinterpret_cast(parentIdxMemPtr->GetData()); + const auto *maxSeqLen = reinterpret_cast(maxSeqLenMemPtr->GetData()); + const auto endToken = (reinterpret_cast(endTokenMemPtr->GetData()))[0]; + auto *finalIdx = reinterpret_cast(dstMemPtr->GetData()); bool incorrectResult = false; parallel_for2d(batchSize, beamWidth, [&](size_t batch, size_t beam) { diff --git a/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp b/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp index 9ec70581699304..121686bad25fa8 100644 --- a/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp +++ b/src/plugins/intel_cpu/src/nodes/generate_proposals.cpp @@ -362,10 +362,10 @@ void GenerateProposals::execute(dnnl::stream strm) { } // Prepare memory - const float *p_deltas_item = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetPtr()); - const float *p_scores_item = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->GetPtr()); - const float *p_anchors_item = reinterpret_cast(getParentEdgeAt(INPUT_ANCHORS)->getMemoryPtr()->GetPtr()); - const float *p_img_info_cpu = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->GetPtr()); + const float *p_deltas_item = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetData()); + const float *p_scores_item = reinterpret_cast(getParentEdgeAt(INPUT_SCORES)->getMemoryPtr()->GetData()); + const float *p_anchors_item = reinterpret_cast(getParentEdgeAt(INPUT_ANCHORS)->getMemoryPtr()->GetData()); + const float *p_img_info_cpu = reinterpret_cast(getParentEdgeAt(INPUT_IM_INFO)->getMemoryPtr()->GetData()); const int anchors_num = scoreDims[1]; @@ -453,9 +453,9 @@ void GenerateProposals::execute(dnnl::stream strm) { } // copy to out memory redefineOutputMemory({VectorDims{total_num_rois, 4}, VectorDims{total_num_rois}, VectorDims{batch_size}}); - float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); - float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetPtr()); - uint8_t* p_roi_num_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROI_NUM)[0]->getMemoryPtr()->GetPtr()); + float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetData()); + float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetData()); + uint8_t* p_roi_num_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROI_NUM)[0]->getMemoryPtr()->GetData()); memcpy(p_roi_item, &roi_item[0], roi_item.size() * sizeof(float)); memcpy(p_roi_score_item, &score_item[0], score_item.size() * sizeof(float)); memcpy(p_roi_num_item, &roi_num[0], getChildEdgesAtPort(OUTPUT_ROI_NUM)[0]->getMemoryPtr()->GetSize()); diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index f60dd82e1c811b..fd5894429bda42 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -262,9 +262,9 @@ void GridSample::prepareParams() { } void GridSample::execute(dnnl::stream strm) { - const void* srcData = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr(); - const uint8_t* gridData = reinterpret_cast(getParentEdgeAt(IN_GRID)->getMemoryPtr()->GetPtr()); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const void* srcData = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetData(); + const uint8_t* gridData = reinterpret_cast(getParentEdgeAt(IN_GRID)->getMemoryPtr()->GetData()); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); auto threadBody = [&](const int ithr, const int nthr) { const auto& p = execParamsPerThread[ithr]; diff --git a/src/plugins/intel_cpu/src/nodes/grn.cpp b/src/plugins/intel_cpu/src/nodes/grn.cpp index 44fe953d601027..0c1bdfd7632e8e 100644 --- a/src/plugins/intel_cpu/src/nodes/grn.cpp +++ b/src/plugins/intel_cpu/src/nodes/grn.cpp @@ -94,8 +94,8 @@ void GRN::executeDynamicImpl(dnnl::stream strm) { } void GRN::execute(dnnl::stream strm) { - const float* src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const float* src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); parallel_for3d(N, H, W, [&](int b, int h, int w) { double variance = 0; diff --git a/src/plugins/intel_cpu/src/nodes/if.cpp b/src/plugins/intel_cpu/src/nodes/if.cpp index a2388fb5772f6d..54b2aaf55190fc 100644 --- a/src/plugins/intel_cpu/src/nodes/if.cpp +++ b/src/plugins/intel_cpu/src/nodes/if.cpp @@ -29,7 +29,7 @@ void If::PortMapHelper::execute(dnnl::stream& strm) { // after subgraph inference we should redefine out memory of 'If' redefineTo(); - cpu_memcpy(dstMemPtrs.front()->GetPtr(), srcMemPtr->GetPtr(), size); + cpu_memcpy(dstMemPtrs.front()->GetData(), srcMemPtr->GetData(), size); } void If::PortMapHelper::redefineTo() { @@ -217,7 +217,7 @@ std::deque If::getToMemories(const Node* node, const size_t port) con } void If::execute(dnnl::stream strm) { - const bool condition = static_cast((reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()))[0]); + const bool condition = static_cast((reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()))[0]); auto& beforeMappers = condition ? beforeThenMappers : beforeElseMappers; auto& afterMappers = condition ? afterThenMappers : afterElseMappers; diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 3ad3275f450c38..560c2d8e44a70f 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -266,22 +266,21 @@ void Input::cloneBlobIfRequired() { } auto cloneBlob = [&, this] () { - Memory memory{ getEngine() }; + MemoryPtr memory; // CVS-74980 // oneDNN always allocate 1byte for element type with bitWidth < 8 (u4,u1...) // but ngraph Constant uses actual bitWidth for data storage allocation // in that case we make a copy to avoid overflow if (constOp->get_byte_size() >= memDesc.getCurrentMemSize()) { - memory.Create(memDesc, constOp->get_data_ptr()); + memory = MemoryPtr(new Memory(getEngine(), memDesc, constOp->get_data_ptr())); } else { - memory.Create(memDesc); - memcpy(memory.GetPtr(), constOp->get_data_ptr(), constOp->get_byte_size()); + memory = MemoryPtr(new Memory(getEngine(), memDesc)); + memcpy(memory->GetData(), constOp->get_data_ptr(), constOp->get_byte_size()); } - MemoryPtr ptr = MemoryPtr(new Memory(getEngine())); - ptr->Create(memDesc); - ptr->SetData(memory, needFlushDenormalsToZero); + MemoryPtr ptr = MemoryPtr(new Memory(getEngine(), memDesc)); + ptr->SetData(*memory.get(), needFlushDenormalsToZero); return ptr; }; @@ -366,15 +365,14 @@ void Input::cloneBlobIfRequired() { auto weightCache = context->getWeightsCache(); if (weightCache) { MemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob); - memoryPtr = std::const_pointer_cast(ptr); + memoryPtr = std::const_pointer_cast(ptr); // IRs already have all subnormals flushed to zero, but in // read_model scenario with directly loaded original model still can have subnormals } else if (isBlobAligned() && (!needFlushDenormalsToZero || !hasSubnormals()) && !isWA()) { - auto ptr = new Memory(getEngine()); - ptr->Create(memDesc, constOp->get_data_ptr()); + auto ptr = new Memory(getEngine(), memDesc, constOp->get_data_ptr()); memoryPtr = MemoryCPtr(ptr); } else { - memoryPtr = std::const_pointer_cast(cloneBlob()); + memoryPtr = std::const_pointer_cast(cloneBlob()); } } diff --git a/src/plugins/intel_cpu/src/nodes/interaction.cpp b/src/plugins/intel_cpu/src/nodes/interaction.cpp index 47f4afdeb47cb9..2f8094d2b06c49 100644 --- a/src/plugins/intel_cpu/src/nodes/interaction.cpp +++ b/src/plugins/intel_cpu/src/nodes/interaction.cpp @@ -238,10 +238,10 @@ static inline void flat_triangle(const uint8_t* in, uint8_t* out, size_t size, s void Interaction::execRef(dnnl::stream strm) { using namespace dnnl; - uint8_t* outFeaturesPtr = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + uint8_t* outFeaturesPtr = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); std::vector inputPtrs(inputSizes); for (uint32_t n = 0; n < inputSizes; n++) { - auto inPtr = reinterpret_cast(getParentEdgeAt(n)->getMemoryPtr()->GetPtr()); + auto inPtr = reinterpret_cast(getParentEdgeAt(n)->getMemoryPtr()->GetData()); inputPtrs[n] = inPtr; } std::unordered_map mem_ags{{DNNL_ARG_SRC, inputMemPtr->GetPrimitive()}, @@ -249,10 +249,10 @@ void Interaction::execRef(dnnl::stream strm) { {DNNL_ARG_DST, outputMemPtr->GetPrimitive()}}; float* scales = fqScales.empty() ? nullptr : fqScales.data(); for (int64_t start = 0; start < static_cast(batchSize); start++) { - cat(reinterpret_cast(inputMemPtr->GetPtr()), inputPtrs, featureSizes, start, dataPrecision.size()); + cat(reinterpret_cast(inputMemPtr->GetData()), inputPtrs, featureSizes, start, dataPrecision.size()); prim.execute(strm, mem_ags); - flat_triangle(reinterpret_cast(outputMemPtr->GetPtr()), - reinterpret_cast(flatMemPtr->GetPtr()), + flat_triangle(reinterpret_cast(outputMemPtr->GetData()), + reinterpret_cast(flatMemPtr->GetData()), inputSizes, dataPrecision.size()); // in1 dense feature @@ -266,7 +266,7 @@ void Interaction::execRef(dnnl::stream strm) { } if (moveInteractKernel) { jit_move_scale_call_args interArgs; - interArgs.p_in = flatMemPtr->GetPtr(); + interArgs.p_in = flatMemPtr->GetData(); interArgs.p_out = outFeaturesPtr + (start * outputFeaturesLen + featureSize) * outputDataType.size(); interArgs.p_scales = scales; (*moveInteractKernel)(&interArgs); @@ -306,8 +306,7 @@ void Interaction::prepareParams() { featureSizes.assign(inputSizes, featureSize); auto initMemoryPtr = [&](const InferenceEngine::Precision &prc, const intel_cpu::Shape& shape, MemoryPtr& ptr) { - ptr = std::make_shared(getEngine()); - ptr->Create(intel_cpu::DnnlBlockedMemoryDesc(prc, shape)); + ptr = std::make_shared(getEngine(), intel_cpu::DnnlBlockedMemoryDesc(prc, shape)); }; initMemoryPtr(dataPrecision, intel_cpu::Shape{inputSizes, featureSize}, inputMemPtr); initMemoryPtr(dataPrecision, intel_cpu::Shape{inputShapes.size(), inputShapes.size()}, outputMemPtr); diff --git a/src/plugins/intel_cpu/src/nodes/interpolate.cpp b/src/plugins/intel_cpu/src/nodes/interpolate.cpp index 95d75a3010df83..0101cd307e7b23 100644 --- a/src/plugins/intel_cpu/src/nodes/interpolate.cpp +++ b/src/plugins/intel_cpu/src/nodes/interpolate.cpp @@ -2182,7 +2182,7 @@ bool Interpolate::needShapeInfer() const { if (lastScales.empty()) { return true; } - const float *scales = reinterpret_cast(getParentEdgesAtPort(get_scale_id())[0]->getMemory().GetPtr()); + const float *scales = reinterpret_cast(getParentEdgesAtPort(get_scale_id())[0]->getMemory().GetData()); for (size_t i = 0; i < lastScales.size(); i++) { if (lastScales[i] != scales[i]) { return true; @@ -2192,7 +2192,7 @@ bool Interpolate::needShapeInfer() const { if (lastSizes.empty()) { return true; } - const int32_t *sizes = reinterpret_cast(getParentEdgesAtPort(TARGET_SHAPE_ID)[0]->getMemory().GetPtr()); + const int32_t *sizes = reinterpret_cast(getParentEdgesAtPort(TARGET_SHAPE_ID)[0]->getMemory().GetData()); for (size_t i = 0; i < lastSizes.size(); i++) { if (sizes[i] != lastSizes[i]) { return true; @@ -2208,10 +2208,10 @@ void Interpolate::executeDynamicImpl(dnnl::stream strm) { const size_t port = shapeCalcMode == InterpolateShapeCalcMode::sizes ? TARGET_SHAPE_ID : get_scale_id(); const auto &memory = getParentEdgesAtPort(port)[0]->getMemory(); if (shapeCalcMode == InterpolateShapeCalcMode::scales) { - const float *scales = reinterpret_cast(memory.GetPtr()); + const float *scales = reinterpret_cast(memory.GetData()); lastScales.assign(scales, scales + memory.getDesc().getShape().getElementsCount()); } else { - const int32_t *sizes = reinterpret_cast(memory.GetPtr()); + const int32_t *sizes = reinterpret_cast(memory.GetData()); lastSizes.assign(sizes, sizes + memory.getDesc().getShape().getElementsCount()); } } @@ -2288,7 +2288,7 @@ void Interpolate::prepareParams() { if (shapeCalcMode == InterpolateShapeCalcMode::scales) { if (!isScaleConstant) { const auto& scalesMem = getParentEdgesAtPort(get_scale_id())[0]->getMemory(); - const float* scalesData = reinterpret_cast(scalesMem.GetPtr()); + const float* scalesData = reinterpret_cast(scalesMem.GetData()); scales.assign(scalesData, scalesData + scalesMem.getStaticDims()[0]); } } @@ -2447,7 +2447,7 @@ void Interpolate::execute(dnnl::stream strm) { auto srcMemPtr = getParentEdgeAt(DATA_ID)->getMemoryPtr(); if (execPtr) { - uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetData()); const uint8_t *src_data_origin = reinterpret_cast(srcMemPtr->GetData()); const uint8_t *src_data = nullptr; std::vector srcPadded; diff --git a/src/plugins/intel_cpu/src/nodes/log_softmax.cpp b/src/plugins/intel_cpu/src/nodes/log_softmax.cpp index d69eac583eaae8..6352ecdbb35854 100644 --- a/src/plugins/intel_cpu/src/nodes/log_softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/log_softmax.cpp @@ -87,8 +87,8 @@ void LogSoftmax::executeDynamicImpl(dnnl::stream strm) { } void LogSoftmax::execute(dnnl::stream strm) { - const float *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const float *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); if (isLastDim) { parallel_for(axisStep, [&](size_t i) { diff --git a/src/plugins/intel_cpu/src/nodes/mathematics.cpp b/src/plugins/intel_cpu/src/nodes/mathematics.cpp index bb639ac31b6cbd..1ff212f3a382ff 100644 --- a/src/plugins/intel_cpu/src/nodes/mathematics.cpp +++ b/src/plugins/intel_cpu/src/nodes/mathematics.cpp @@ -72,8 +72,8 @@ void Math::executeDynamicImpl(dnnl::stream strm) { void Math::execute(dnnl::stream strm) { size_t dataSize = getChildEdgesAtPort(0)[0]->getMemory().GetShape().getElementsCount(); - const float *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const float *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); switch (getAlgorithm()) { case Algorithm::MathAbs: diff --git a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp index fd7f2ad6d08afd..d10dedfffb534f 100644 --- a/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/matrix_nms.cpp @@ -302,8 +302,8 @@ void MatrixNms::executeDynamicImpl(dnnl::stream strm) { } void MatrixNms::execute(dnnl::stream strm) { - const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); - const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetData()); + const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetData()); InferenceEngine::parallel_for2d(m_numBatches, m_numClasses, [&](size_t batchIdx, size_t classIdx) { if (classIdx == static_cast(m_backgroundClass)) { @@ -380,9 +380,9 @@ void MatrixNms::execute(dnnl::stream strm) { size_t totalBox = std::accumulate(m_numPerBatch.begin(), m_numPerBatch.end(), size_t(0)); redefineOutputMemory({{totalBox, 6}, {totalBox, 1}, {m_numBatches}}); } - float* selectedOutputs = reinterpret_cast(selectedOutputsMemPtr->GetPtr()); - int* selectedIndices = reinterpret_cast(selectedIndicesMemPtr->GetPtr()); - int* validOutputs = reinterpret_cast(validOutputsMemPtr->GetPtr()); + float* selectedOutputs = reinterpret_cast(selectedOutputsMemPtr->GetData()); + int* selectedIndices = reinterpret_cast(selectedIndicesMemPtr->GetData()); + int* validOutputs = reinterpret_cast(validOutputsMemPtr->GetData()); for (size_t i = 0; i < m_numPerBatch.size(); i++) validOutputs[i] = static_cast(m_numPerBatch[i]); diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 20cb3f3b961c4a..ffb485e790ea14 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -106,7 +106,7 @@ bool MemoryInput::isSupportedOperation(const std::shared_ptr } MemoryInput::MemoryInput(const std::shared_ptr& op, const GraphContext::CPtr ctx) - : Input(op, ctx), MemoryNode(op), dataStore(new Memory{ctx->getEngine()}) { + : Input(op, ctx), MemoryNode(op) { std::string errorMessage; if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; @@ -119,7 +119,7 @@ MemoryInput::MemoryInput(const std::shared_ptr& op, const GraphCon void MemoryInput::createPrimitive() { Input::createPrimitive(); - dataStore->Create(getChildEdgeAt(0)->getMemory().getDesc()); + dataStore = std::make_shared(getEngine(), getChildEdgeAt(0)->getMemory().getDesc()); // default memory state is zero filled if (dataStore->getDesc().hasDefinedMaxSize()) @@ -133,9 +133,9 @@ void MemoryInput::createPrimitive() { * @param src source memory object */ inline -static void simple_copy(const Memory& dst, const Memory& src) { - auto srcPtr = static_cast(src.GetPtr()); - auto dstPtr = static_cast(dst.GetPtr()); +static void simple_copy(const IMemory& dst, const IMemory& src) { + auto srcPtr = static_cast(src.GetData()); + auto dstPtr = static_cast(dst.GetData()); if (src.GetDataType() == dst.GetDataType()) { auto srcSizeInByte = src.GetSize(); auto dstSizeInByte = dst.GetSize(); @@ -157,7 +157,7 @@ MemoryPtr MemoryInput::getStore() { return dataStore; } -void MemoryInput::storeState(const Memory &new_state) { +void MemoryInput::storeState(const IMemory &new_state) { // TODO: Should be next one call: // dataStore.SetData(new_state, false); // But because of performance reason we use simple manual copy diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index 4414cb0dde68fa..2035f3e8651064 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -101,7 +101,7 @@ class MemoryInput : public Input, public MemoryNode { void createPrimitive() override; void setInputNode(Node* node) override {} - void storeState(const Memory& mem); + void storeState(const IMemory& mem); MemoryPtr getStore(); private: MemoryPtr dataStore; diff --git a/src/plugins/intel_cpu/src/nodes/mha.cpp b/src/plugins/intel_cpu/src/nodes/mha.cpp index 5be06b7ef941e4..9e0bb32812d5e6 100644 --- a/src/plugins/intel_cpu/src/nodes/mha.cpp +++ b/src/plugins/intel_cpu/src/nodes/mha.cpp @@ -1207,11 +1207,11 @@ void MHA::callBrgemm(brgemmCtx& ctx, std::unique_ptr& brgKernel template void MHA::mhaImpl() { - const uint8_t* pTranspose0In0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const uint8_t* pTranspose1In0 = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - const float* pAddIn1 = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); - const uint8_t* pTranspose2In0 = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetPtr()); - uint8_t* pout = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const uint8_t* pTranspose0In0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + const uint8_t* pTranspose1In0 = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); + const float* pAddIn1 = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetData()); + const uint8_t* pTranspose2In0 = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetData()); + uint8_t* pout = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); auto outPrcSize = getOriginalOutputPrecisionAtPort(0).size(); diff --git a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp index c0c6429d4fa44a..28e94264d143fb 100644 --- a/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp +++ b/src/plugins/intel_cpu/src/nodes/multiclass_nms.cpp @@ -203,8 +203,8 @@ void MultiClassNms::executeDynamicImpl(dnnl::stream strm) { } void MultiClassNms::execute(dnnl::stream strm) { - const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); - const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetData()); + const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetData()); auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims(); auto dims_scores = getParentEdgeAt(NMS_SCORES)->getMemory().getStaticDims(); @@ -225,7 +225,7 @@ void MultiClassNms::execute(dnnl::stream strm) { int* roisnum = nullptr; VectorDims roisnumStrides; if (has_roinum) { - roisnum = reinterpret_cast(getParentEdgeAt(NMS_ROISNUM)->getMemoryPtr()->GetPtr()); + roisnum = reinterpret_cast(getParentEdgeAt(NMS_ROISNUM)->getMemoryPtr()->GetData()); roisnumStrides = getParentEdgeAt(NMS_ROISNUM)->getMemory().GetDescWithType()->getStrides(); } @@ -328,9 +328,9 @@ void MultiClassNms::execute(dnnl::stream strm) { size_t totalBox = std::accumulate(m_selected_num.begin(), m_selected_num.end(), size_t(0)); redefineOutputMemory({{totalBox, 6}, {totalBox, 1}, {m_numBatches}}); } - int* selected_indices = reinterpret_cast(selectedIndicesMemPtr->GetPtr()); - float* selected_outputs = reinterpret_cast(selectedOutputsMemPtr->GetPtr()); - int* selected_num = reinterpret_cast(validOutputsMemPtr->GetPtr()); + int* selected_indices = reinterpret_cast(selectedIndicesMemPtr->GetData()); + float* selected_outputs = reinterpret_cast(selectedOutputsMemPtr->GetData()); + int* selected_num = reinterpret_cast(validOutputsMemPtr->GetData()); auto _flattened_index = [](int batch_idx, int box_idx, int num_box) { return batch_idx * num_box + box_idx; diff --git a/src/plugins/intel_cpu/src/nodes/mvn.cpp b/src/plugins/intel_cpu/src/nodes/mvn.cpp index 0be28a12fb2fa9..6b7031f5cc2794 100644 --- a/src/plugins/intel_cpu/src/nodes/mvn.cpp +++ b/src/plugins/intel_cpu/src/nodes/mvn.cpp @@ -1462,8 +1462,8 @@ void MVN::execute(dnnl::stream strm) { auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (execPtr) { - uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); - uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); + uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetData()); + uint8_t *src_data = reinterpret_cast(srcMemPtr->GetData()); execPtr->exec(src_data, dst_data, postOpsDataPtrs.data()); } else if (aclExecPtr) { aclExecPtr->exec({srcMemPtr}, {dstMemPtr}, postOpsDataPtrs.data()); diff --git a/src/plugins/intel_cpu/src/nodes/ngram.cpp b/src/plugins/intel_cpu/src/nodes/ngram.cpp index 53192ef396e930..a54f7ab07d6efc 100644 --- a/src/plugins/intel_cpu/src/nodes/ngram.cpp +++ b/src/plugins/intel_cpu/src/nodes/ngram.cpp @@ -115,7 +115,7 @@ void Ngram::prepareParams() { template std::vector Ngram::computeBatchLenghts() { - auto* srcIndices = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + auto* srcIndices = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); std::vector batchLenghts{0}; batchLenghts.reserve(numIdces + 1); @@ -130,8 +130,8 @@ std::vector Ngram::computeBatchLenghts() { } void Ngram::execute(dnnl::stream strm) { - auto* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + auto* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + auto* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); std::vector batchLenghts; if (idcesPrecision == InferenceEngine::Precision::I32) { diff --git a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp index 86a502e4333981..0ad8af40366d4b 100644 --- a/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_max_suppression.cpp @@ -723,20 +723,20 @@ void NonMaxSuppression::createJitKernel() { void NonMaxSuppression::executeDynamicImpl(dnnl::stream strm) { if (hasEmptyInputTensors() || (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS && - reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetPtr())[0] == 0)) { + reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetData())[0] == 0)) { redefineOutputMemory({{0, 3}, {0, 3}, {1}}); - *reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetPtr()) = 0; + *reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetData()) = 0; return; } execute(strm); } void NonMaxSuppression::execute(dnnl::stream strm) { - const float *boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); - const float *scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); + const float *boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetData()); + const float *scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetData()); if (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS) { - maxOutputBoxesPerClass = reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetPtr())[0]; + maxOutputBoxesPerClass = reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetData())[0]; } maxOutputBoxesPerClass = std::min(maxOutputBoxesPerClass, numBoxes); @@ -746,13 +746,13 @@ void NonMaxSuppression::execute(dnnl::stream strm) { } if (inputShapes.size() > NMS_IOUTHRESHOLD) - iouThreshold = reinterpret_cast(getParentEdgeAt(NMS_IOUTHRESHOLD)->getMemoryPtr()->GetPtr())[0]; + iouThreshold = reinterpret_cast(getParentEdgeAt(NMS_IOUTHRESHOLD)->getMemoryPtr()->GetData())[0]; if (inputShapes.size() > NMS_SCORETHRESHOLD) - scoreThreshold = reinterpret_cast(getParentEdgeAt(NMS_SCORETHRESHOLD)->getMemoryPtr()->GetPtr())[0]; + scoreThreshold = reinterpret_cast(getParentEdgeAt(NMS_SCORETHRESHOLD)->getMemoryPtr()->GetData())[0]; if (inputShapes.size() > NMS_SOFTNMSSIGMA) - softNMSSigma = reinterpret_cast(getParentEdgeAt(NMS_SOFTNMSSIGMA)->getMemoryPtr()->GetPtr())[0]; + softNMSSigma = reinterpret_cast(getParentEdgeAt(NMS_SOFTNMSSIGMA)->getMemoryPtr()->GetData())[0]; scale = 0.0f; if (softNMSSigma > 0.0) { scale = -0.5f / softNMSSigma; @@ -806,8 +806,8 @@ void NonMaxSuppression::execute(dnnl::stream strm) { int selectedIndicesStride = indicesMemPtr->GetDescWithType()->getStrides()[0]; - int *selectedIndicesPtr = reinterpret_cast(indicesMemPtr->GetPtr()); - float *selectedScoresPtr = reinterpret_cast(scoresMemPtr->GetPtr()); + int *selectedIndicesPtr = reinterpret_cast(indicesMemPtr->GetData()); + float *selectedScoresPtr = reinterpret_cast(scoresMemPtr->GetData()); size_t idx = 0lu; for (; idx < validOutputs; idx++) { @@ -827,7 +827,7 @@ void NonMaxSuppression::execute(dnnl::stream strm) { std::fill(selectedScoresPtr, selectedScoresPtr + (maxNumberOfBoxes - idx) * selectedIndicesStride, -1.f); } - int *valid_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetPtr()); + int *valid_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetData()); *valid_outputs = static_cast(validOutputs); } diff --git a/src/plugins/intel_cpu/src/nodes/non_zero.cpp b/src/plugins/intel_cpu/src/nodes/non_zero.cpp index 7704fb7da28000..ff13ba07add9df 100644 --- a/src/plugins/intel_cpu/src/nodes/non_zero.cpp +++ b/src/plugins/intel_cpu/src/nodes/non_zero.cpp @@ -133,7 +133,7 @@ void NonZero::execute(dnnl::stream strm) { template void NonZero::executeSpecified() { const T zero = 0; - const T *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const T *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); Shape inShape = getParentEdgeAt(0)->getMemory().GetShape(); size_t inRank = inShape.getRank(); @@ -150,7 +150,7 @@ void NonZero::executeSpecified() { VectorDims newDims{inRank, totalNonZeroCount}; redefineOutputMemory({newDims}); } - int* dst = reinterpret_cast(dstMemPtr->GetPtr()); + int* dst = reinterpret_cast(dstMemPtr->GetData()); if (totalNonZeroCount == 0) return; diff --git a/src/plugins/intel_cpu/src/nodes/normalize.cpp b/src/plugins/intel_cpu/src/nodes/normalize.cpp index 4989cae71a182b..93012697164f4d 100644 --- a/src/plugins/intel_cpu/src/nodes/normalize.cpp +++ b/src/plugins/intel_cpu/src/nodes/normalize.cpp @@ -937,8 +937,8 @@ void NormalizeL2::execute(dnnl::stream strm) { if (!execPtr) THROW_ERROR << "doesn't have a compiled executor."; - const uint8_t *src_ptr = reinterpret_cast(getParentEdgeAt(DATA)->getMemoryPtr()->GetPtr()); - uint8_t *dst_ptr = reinterpret_cast(getChildEdgeAt(DATA)->getMemoryPtr()->GetPtr()); + const uint8_t *src_ptr = reinterpret_cast(getParentEdgeAt(DATA)->getMemoryPtr()->GetData()); + uint8_t *dst_ptr = reinterpret_cast(getChildEdgeAt(DATA)->getMemoryPtr()->GetData()); execPtr->exec(src_ptr, dst_ptr, postOpsDataPtrs.data()); } diff --git a/src/plugins/intel_cpu/src/nodes/one_hot.cpp b/src/plugins/intel_cpu/src/nodes/one_hot.cpp index a3ee3b715b27df..b70655b5c2a31c 100644 --- a/src/plugins/intel_cpu/src/nodes/one_hot.cpp +++ b/src/plugins/intel_cpu/src/nodes/one_hot.cpp @@ -33,7 +33,7 @@ class OneHotShapeInfer : public ShapeInferEmptyPads { Result infer( const std::vector>& input_shapes, const std::unordered_map& data_dependency) override { - auto depth = reinterpret_cast(data_dependency.at(1)->GetPtr())[0]; + auto depth = reinterpret_cast(data_dependency.at(1)->GetData())[0]; auto result = input_shapes.front().get(); result.insert(result.begin() + m_axis, depth); @@ -132,7 +132,7 @@ OneHot::OneHot(const std::shared_ptr& op, const GraphContext::CPtr } bool OneHot::needShapeInfer() const { - const auto depthNodePtr = reinterpret_cast(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPtr()); + const auto depthNodePtr = reinterpret_cast(getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetData()); if (depth != static_cast(depthNodePtr[0])) { depth = depthNodePtr[0]; return true; @@ -162,11 +162,11 @@ void OneHot::initSupportedPrimitiveDescriptors() { template void OneHot::one_hot(size_t prefix_size, size_t suffix_size) { - const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); - const out_type on_value = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr())[0]; - const out_type off_value = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetPtr())[0]; + const out_type on_value = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetData())[0]; + const out_type off_value = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetData())[0]; // fill the output with off_value std::size_t dst_size = prefix_size * depth * suffix_size; diff --git a/src/plugins/intel_cpu/src/nodes/pad.cpp b/src/plugins/intel_cpu/src/nodes/pad.cpp index 8baca7c5636500..28299df68609d5 100644 --- a/src/plugins/intel_cpu/src/nodes/pad.cpp +++ b/src/plugins/intel_cpu/src/nodes/pad.cpp @@ -238,7 +238,7 @@ void Pad::PadExecutor::paramsInitialization(const PadAttrs& attrs, auto fillingInParameters = [&](VectorIdxs& parameter, const size_t type, const size_t size, const int value) { - const int* ptr = reinterpret_cast(srcMemory[type]->GetPtr()); + const int* ptr = reinterpret_cast(srcMemory[type]->GetData()); parameter.resize(size); for (size_t i = 0; i < size; i++) { parameter[i] = static_cast(ptr[i]); @@ -250,7 +250,7 @@ void Pad::PadExecutor::paramsInitialization(const PadAttrs& attrs, if (params.attrs.padsEnd.empty()) fillingInParameters(params.attrs.padsEnd, PADS_END_ID, srcDims.size(), 0); if (!params.attrs.constPadValue) - params.attrs.padValue = reinterpret_cast(srcMemory[PAD_VALUE_ID]->GetPtr())[0]; + params.attrs.padValue = reinterpret_cast(srcMemory[PAD_VALUE_ID]->GetData())[0]; // pads are constant, so we can calculate new collapsing pads for first target dimensions and use it for the next // dimensions to avoid permanent identical pad calculations const size_t blockSize = srcMemPtr->getDesc().hasLayoutType(LayoutType::nCsp16c) @@ -438,7 +438,7 @@ void Pad::PadExecutor::padConstant(const MemoryPtr& srcMemPtr, const MemoryPtr& template void Pad::PadExecutor::padConstantCommon(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { - T* dstData = reinterpret_cast(dstMemPtr->GetPtr()); + T* dstData = reinterpret_cast(dstMemPtr->GetData()); const T value = static_cast(params.attrs.padValue); if (zeroInputDimsCase) { const auto workAmount = dstMemPtr->GetDescWithType()->getPaddedElementsCount(); @@ -449,7 +449,7 @@ void Pad::PadExecutor::padConstantCommon(const MemoryPtr& srcMemPtr, const Memor return; } - const T* srcData = reinterpret_cast(srcMemPtr->GetPtr()); + const T* srcData = reinterpret_cast(srcMemPtr->GetData()); parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; @@ -487,8 +487,8 @@ void Pad::PadExecutor::padConstantCommon(const MemoryPtr& srcMemPtr, const Memor } void Pad::PadExecutor::padConstantZero(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->GetData()); parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; @@ -528,8 +528,8 @@ void Pad::PadExecutor::padConstantZero(const MemoryPtr& srcMemPtr, const MemoryP } void Pad::PadExecutor::padEdge(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr) { - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->GetData()); parallel_nt(params.nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; @@ -569,8 +569,8 @@ void Pad::PadExecutor::padEdge(const MemoryPtr& srcMemPtr, const MemoryPtr& dstM } void Pad::PadExecutor::padReflectOrSymmetric(const MemoryPtr& srcMemPtr, const MemoryPtr& dstMemPtr, const bool isSymmetric) { - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->GetData()); const size_t shift = isSymmetric ? 1 : 0; const size_t endSrcShift = (params.srcDimsForReflectOrSymmetric[params.nDimsForWork] - params.srcODims[params.nDimsForWork]) * params.shift; diff --git a/src/plugins/intel_cpu/src/nodes/priorbox.cpp b/src/plugins/intel_cpu/src/nodes/priorbox.cpp index 7d58745f014f29..9e031d44d15b5c 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox.cpp @@ -33,7 +33,7 @@ class PriorBoxShapeInfer : public ShapeInferEmptyPads { Result infer( const std::vector>& input_shapes, const std::unordered_map& data_dependency) override { - const int* in_data = reinterpret_cast(data_dependency.at(0)->GetPtr()); + const int* in_data = reinterpret_cast(data_dependency.at(0)->GetData()); const int H = in_data[0]; const int W = in_data[1]; const auto output = static_cast(4 * H * W * m_number_of_priors); @@ -158,7 +158,7 @@ bool PriorBox::needShapeInfer() const { } const auto& outputShape = memory->GetShape().getStaticDims(); - const int* in_data = reinterpret_cast(memory->GetPtr()); + const int* in_data = reinterpret_cast(memory->GetData()); const int h = in_data[0]; const int w = in_data[1]; const auto output = static_cast(4 * h * w * number_of_priors); @@ -189,18 +189,18 @@ void PriorBox::createPrimitive() { } void PriorBox::execute(dnnl::stream strm) { - const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); const int H = in_data[0]; const int W = in_data[1]; - const int* in_image = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + const int* in_image = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); const int IH = in_image[0]; const int IW = in_image[1]; const int OH = 4 * H * W * number_of_priors; const int OW = 1; - float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); float step_ = step; auto min_size_ = min_size; diff --git a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp index 0e4ef85117e5ac..7d8f72b38249a5 100644 --- a/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp +++ b/src/plugins/intel_cpu/src/nodes/priorbox_clustered.cpp @@ -32,7 +32,7 @@ class PriorBoxClusteredShapeInfer : public ShapeInferEmptyPads { Result infer( const std::vector>& input_shapes, const std::unordered_map& data_dependency) override { - const int* in_data = reinterpret_cast(data_dependency.at(0)->GetPtr()); + const int* in_data = reinterpret_cast(data_dependency.at(0)->GetData()); const int H = in_data[0]; const int W = in_data[1]; const auto output = static_cast(4 * H * W * m_number_of_priors); @@ -112,7 +112,7 @@ bool PriorBoxClustered::needShapeInfer() const { } const auto& outputShape = memory->GetShape().getStaticDims(); - const int* in_data = reinterpret_cast(memory->GetPtr()); + const int* in_data = reinterpret_cast(memory->GetData()); const int h = in_data[0]; const int w = in_data[1]; const auto output = static_cast(4 * h * w * number_of_priors); @@ -143,11 +143,11 @@ void PriorBoxClustered::createPrimitive() { } void PriorBoxClustered::execute(dnnl::stream strm) { - const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + const int* in_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); const int layer_height = in_data[0]; const int layer_width = in_data[1]; - const int* in_image = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + const int* in_image = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); int img_height = in_image[0]; int img_width = in_image[1]; @@ -158,7 +158,7 @@ void PriorBoxClustered::execute(dnnl::stream strm) { step_h = static_cast(img_height) / layer_height; } - float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); const auto& out_shape = getChildEdgeAt(0)->getMemory().GetShape().getStaticDims(); size_t var_size = variances.size(); diff --git a/src/plugins/intel_cpu/src/nodes/proposal.cpp b/src/plugins/intel_cpu/src/nodes/proposal.cpp index 6f713d5d8ae1a3..2f270043065c4b 100644 --- a/src/plugins/intel_cpu/src/nodes/proposal.cpp +++ b/src/plugins/intel_cpu/src/nodes/proposal.cpp @@ -164,13 +164,13 @@ void Proposal::executeDynamicImpl(dnnl::stream strm) { void Proposal::execute(dnnl::stream strm) { try { - const float* probabilitiesData = reinterpret_cast(getParentEdgeAt(PROBABILITIES_IN_IDX)->getMemoryPtr()->GetPtr()); - const float* anchorsData = reinterpret_cast(getParentEdgeAt(ANCHORS_IN_IDX)->getMemoryPtr()->GetPtr()); - const float* imgInfoData = reinterpret_cast(getParentEdgeAt(IMG_INFO_IN_IDX)->getMemoryPtr()->GetPtr()); - float* outRoiData = reinterpret_cast (getChildEdgesAtPort(ROI_OUT_IDX)[0]->getMemoryPtr()->GetPtr()); + const float* probabilitiesData = reinterpret_cast(getParentEdgeAt(PROBABILITIES_IN_IDX)->getMemoryPtr()->GetData()); + const float* anchorsData = reinterpret_cast(getParentEdgeAt(ANCHORS_IN_IDX)->getMemoryPtr()->GetData()); + const float* imgInfoData = reinterpret_cast(getParentEdgeAt(IMG_INFO_IN_IDX)->getMemoryPtr()->GetData()); + float* outRoiData = reinterpret_cast (getChildEdgesAtPort(ROI_OUT_IDX)[0]->getMemoryPtr()->GetData()); float* outProbData = nullptr; if (store_prob) - outProbData = reinterpret_cast (getChildEdgesAtPort(PROBABILITIES_OUT_IDX)[0]->getMemoryPtr()->GetPtr()); + outProbData = reinterpret_cast (getChildEdgesAtPort(PROBABILITIES_OUT_IDX)[0]->getMemoryPtr()->GetData()); auto inProbDims = getParentEdgeAt(0)->getMemory().getStaticDims(); const size_t imgInfoSize = getParentEdgeAt(2)->getMemory().getStaticDims()[0]; diff --git a/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp index ba96d219926f4c..940f49d7dcc31d 100644 --- a/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/psroi_pooling.cpp @@ -482,9 +482,9 @@ void PSROIPooling::executeBilinearDeformable(const inputType *srcData, outputTyp template void PSROIPooling::executeSpecified() { - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto *bottomRoisBeginning = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + const auto *bottomRoisBeginning = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); + auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); @@ -503,7 +503,7 @@ void PSROIPooling::executeSpecified() { int channelsEachClass = outputDim; if (!noTrans) { const auto mem = getParentEdgeAt(2)->getMemoryPtr(); - bottomTrans = reinterpret_cast(mem->GetPtr()); + bottomTrans = reinterpret_cast(mem->GetData()); numClasses = static_cast(mem->getStaticDims()[1]) / 2; channelsEachClass /= numClasses; } diff --git a/src/plugins/intel_cpu/src/nodes/range.cpp b/src/plugins/intel_cpu/src/nodes/range.cpp index 7fd0e2a74f7bbd..6082f4a336aa1c 100644 --- a/src/plugins/intel_cpu/src/nodes/range.cpp +++ b/src/plugins/intel_cpu/src/nodes/range.cpp @@ -118,9 +118,9 @@ size_t Range::getWorkAmount(data_t *startPtr, data_t *stopPtr, data_t *stepPtr) stopPtr = &limit; if (stepPtr == nullptr) stepPtr = δ - *startPtr = reinterpret_cast(getParentEdgeAt(RANGE_START)->getMemoryPtr()->GetPtr())[0]; - *stopPtr = reinterpret_cast(getParentEdgeAt(RANGE_LIMIT)->getMemoryPtr()->GetPtr())[0]; - *stepPtr = reinterpret_cast(getParentEdgeAt(RANGE_DELTA)->getMemoryPtr()->GetPtr())[0]; + *startPtr = reinterpret_cast(getParentEdgeAt(RANGE_START)->getMemoryPtr()->GetData())[0]; + *stopPtr = reinterpret_cast(getParentEdgeAt(RANGE_LIMIT)->getMemoryPtr()->GetData())[0]; + *stepPtr = reinterpret_cast(getParentEdgeAt(RANGE_DELTA)->getMemoryPtr()->GetData())[0]; const data_t span = *stopPtr - *startPtr; const data_t step = *stepPtr; if (std::is_same::value) { @@ -140,7 +140,7 @@ InferenceEngine::StatusCode Range::rangeKernel() { VectorDims newOutputShape {work_amount_dst}; redefineOutputMemory({newOutputShape}); } - data_t* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + data_t* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); parallel_nt(0, [&](const int ithr, const int nthr) { size_t iwork = 0, end = 0; splitter(work_amount_dst, nthr, ithr, iwork, end); diff --git a/src/plugins/intel_cpu/src/nodes/rdft.cpp b/src/plugins/intel_cpu/src/nodes/rdft.cpp index de8d493df71f5f..0eabc5505ac651 100644 --- a/src/plugins/intel_cpu/src/nodes/rdft.cpp +++ b/src/plugins/intel_cpu/src/nodes/rdft.cpp @@ -157,8 +157,8 @@ void RDFT::execute(dnnl::stream strm) { const auto& inputShape = inputMem.getStaticDims(); const auto& outputShape = outputMem.getStaticDims(); - auto inputPtr = reinterpret_cast(inputMem.GetPtr()); - auto outputPtr = reinterpret_cast(outputMem.GetPtr()); + auto inputPtr = reinterpret_cast(inputMem.GetData()); + auto outputPtr = reinterpret_cast(outputMem.GetData()); auto rank = inputShape.size() - inverse; @@ -187,7 +187,7 @@ void RDFT::prepareParams() { if (axes.size() != newAxesSize) { axes.resize(newAxesSize); } - auto axesPtr = reinterpret_cast(axesMem->GetPtr()); + auto axesPtr = reinterpret_cast(axesMem->GetData()); auto inputRank = inputShapes[DATA_INDEX].getRank() - inverse; for (size_t i = 0; i < axes.size(); i++) { axes[i] = axesPtr[i] < 0 ? axesPtr[i] + inputRank : axesPtr[i]; @@ -213,7 +213,7 @@ void RDFT::prepareParams() { if (signalSizes.size() != newSize) { signalSizes.resize(newSize); } - const auto& signalSizesPtr = reinterpret_cast(signalSizesMem->GetPtr()); + const auto& signalSizesPtr = reinterpret_cast(signalSizesMem->GetData()); for (size_t i = 0; i < newSize; i++) { signalSizes[i] = signalSizesPtr[i]; } @@ -232,7 +232,7 @@ bool RDFT::axesChanged() const { if (axes.size() != axesMem->getStaticDims()[0]) { return true; } - auto axesPtr = reinterpret_cast(axesMem->GetPtr()); + auto axesPtr = reinterpret_cast(axesMem->GetData()); auto inputRank = inputShapes[DATA_INDEX].getRank() - inverse; for (size_t i = 0; i < axes.size(); i++) { auto newAxis = axesPtr[i] < 0 ? axesPtr[i] + inputRank : axesPtr[i]; @@ -267,7 +267,7 @@ bool RDFT::signalSizesChanged() const { if (signalSizes.size() != newSize || signalSizes.size() != axes.size()) { return true; } - const auto& signalSizesPtr = reinterpret_cast(signalSizesMem->GetPtr()); + const auto& signalSizesPtr = reinterpret_cast(signalSizesMem->GetData()); for (size_t i = 0; i < newSize; i++) { if (signalSizesPtr[i] != signalSizes[i]) { return true; diff --git a/src/plugins/intel_cpu/src/nodes/reduce.cpp b/src/plugins/intel_cpu/src/nodes/reduce.cpp index 589a5ec19f9d5b..78c56b06a493ec 100644 --- a/src/plugins/intel_cpu/src/nodes/reduce.cpp +++ b/src/plugins/intel_cpu/src/nodes/reduce.cpp @@ -2080,8 +2080,8 @@ void Reduce::execute(dnnl::stream strm) { auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto srcMemPtr = getParentEdgeAt(REDUCE_DATA)->getMemoryPtr(); - const uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t *src_data = reinterpret_cast(srcMemPtr->GetData()); + uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetData()); if (jit_mode) { if (is_hybrid_layout) { @@ -2125,7 +2125,7 @@ void Reduce::reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr, size_t dst_siz if (is_hybrid_layout) { uint8_t *proc_ptr = out_ptr; auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - out_ptr = reinterpret_cast(dstMemPtr->GetPtr()); + out_ptr = reinterpret_cast(dstMemPtr->GetData()); if (layout == ReduceLayoutType::reduce_nspc) { nspc2ncsp(proc_ptr, out_ptr); } else { diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index 2356a56ae37028..2c6c9928357c3a 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -122,7 +122,7 @@ bool Reference::needShapeInfer() const { ov::TensorVector Reference::prepareInputs() const { ov::TensorVector inputs; for (size_t i = 0; i < inputShapes.size(); i++) { - void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().GetPtr(); + void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().GetData(); inputs.push_back(ov::Tensor(ngraphOp->get_input_element_type(i), getParentEdgesAtPort(i)[0]->getMemory().getStaticDims(), srcDataPtr)); } @@ -132,7 +132,7 @@ ov::TensorVector Reference::prepareInputs() const { ov::TensorVector Reference::prepareOutputs() const { ov::TensorVector outputs; for (size_t i = 0; i < outputShapes.size(); i++) { - void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().GetPtr(); + void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().GetData(); outputs.push_back(ov::Tensor(ngraphOp->get_output_element_type(i), getChildEdgesAtPort(i)[0]->getMemory().getStaticDims(), dstDataPtr)); } diff --git a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp index 942d9eec06bb2c..2e7a45b2d12435 100644 --- a/src/plugins/intel_cpu/src/nodes/region_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/region_yolo.cpp @@ -410,8 +410,8 @@ void RegionYolo::execute(dnnl::stream strm) { size_t inputs_size = IH * IW * num_ * (classes + coords + 1); size_t total_size = 2 * IH * IW; - const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); cpu_convert(src_data, dst_data, getParentEdgeAt(0)->getMemory().getDesc().getPrecision(), getChildEdgeAt(0)->getMemory().getDesc().getPrecision(), output_size); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index cc0a2d884b9b78..ca2ee7fad36b3c 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -190,11 +190,9 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, IE_THROW() << "Preferable primitive descriptor is not set."; const auto engine = getEngine(); - src_blocked = std::make_shared(engine); - src_blocked->Create(DnnlExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); + src_blocked = std::make_shared(engine, DnnlExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); - dst_blocked = std::make_shared(engine); - dst_blocked->Create(DnnlExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); + dst_blocked = std::make_shared(engine, DnnlExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); auto src_desc = src_blocked->GetPrimitive().get_desc(); if (!src_permutation.empty()) { @@ -227,7 +225,7 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, auto newDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(newDims), src_blocked->GetDataType(), newFormat); - src_blocked->Create(DnnlExtensionUtils::makeDescriptor(newDesc), srcPtr, false); + src_blocked->redefineDesc(DnnlExtensionUtils::makeDescriptor(newDesc), srcPtr, false); src_desc = src_blocked->GetPrimitive().get_desc(); } @@ -275,8 +273,8 @@ void Reorder::optimizedNcsp2Nspc() { const size_t DIM3 = inDims[ndims - 2]; const size_t DIM4 = inDims[ndims - 1]; - auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->GetPtr()); - auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->GetPtr()); + auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->GetData()); + auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->GetData()); const size_t src_batch_stride = DIM1 * DIM2 * DIM3 * DIM4; const size_t dst_batch_stride = dstStrides[0]; @@ -308,8 +306,8 @@ void Reorder::optimizedNspc2Ncsp() { const size_t DIM3 = inDims[ndims - 2]; const size_t DIM4 = inDims[ndims - 1]; - auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->GetPtr()); - auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->GetPtr()); + auto src_data = reinterpret_cast(parentEdge->getMemoryPtr()->GetData()); + auto dst_data = reinterpret_cast(childEdge->getMemoryPtr()->GetData()); const auto dstStrides = childEdge->getMemoryPtr()->GetDescWithType()->getStrides(); const size_t block_size = DIM2 * DIM3 * DIM4; @@ -339,8 +337,8 @@ void Reorder::execute(dnnl::stream strm) { } else if (canUseNcsp2Nspc) { optimizedNcsp2Nspc(); } else { - src_blocked->setDataHandle(getParentEdgeAt(0)->getMemory().GetData()); - dst_blocked->setDataHandle(getChildEdgeAt(0)->getMemory().GetData()); + // src_blocked->setDataHandle(getParentEdgeAt(0)->getMemory().GetData()); + // dst_blocked->setDataHandle(getChildEdgeAt(0)->getMemory().GetData()); if (prim) { prim.execute(strm, primArgs); @@ -365,7 +363,7 @@ std::string Reorder::getReorderArgs(const MemoryDesc &parentDesc, const MemoryDe return inArgs + "_" + outArgs; } -void Reorder::reorderData(const Memory &input, const Memory &output, MultiCachePtr cache) { +void Reorder::reorderData(const IMemory &input, const IMemory &output, MultiCachePtr cache) { if (!input.getDesc().isDefined() || !output.getDesc().isDefined()) IE_THROW() << "Can't reorder data with dynamic shapes"; @@ -374,8 +372,8 @@ void Reorder::reorderData(const Memory &input, const Memory &output, MultiCacheP } if (input.getDesc().isCompatible(output.getDesc())) { - auto srcPtr = static_cast(input.GetPtr()); - auto dstPtr = static_cast(output.GetPtr()); + auto srcPtr = static_cast(input.GetData()); + auto dstPtr = static_cast(output.GetData()); auto copySize = output.GetSize(); cpu_memcpy(dstPtr, srcPtr, copySize); @@ -385,7 +383,7 @@ void Reorder::reorderData(const Memory &input, const Memory &output, MultiCacheP auto srcMemory = input.GetPrimitive(); auto dstMemory = output.GetPrimitive(); - auto engine = output.getEngine(); + auto engine = dstMemory.get_engine(); // try directly reorder reorder = getReorderPrim(cache, dstMemory.get_engine(), srcMemory.get_desc(), dstMemory.get_desc()); if (!reorder) { @@ -394,16 +392,15 @@ void Reorder::reorderData(const Memory &input, const Memory &output, MultiCacheP Convert::isSupportedDesc(output.getDesc())) { //we probably could not make the reorder because there is no one supporting this precision conversion //lets try to convert data first using cpu_convert - auto data = static_cast(input.GetPtr()); + auto data = static_cast(input.GetData()); tmpBuff.resize(input.GetSize()); const auto outPrc = DnnlExtensionUtils::DataTypeToIEPrecision(output.GetDataType()); cpu_convert(data, tmpBuff.data(), DnnlExtensionUtils::DataTypeToIEPrecision(input.GetDataType()), outPrc, input.GetSize() / input.getDesc().getPrecision().size()); - Memory tmpMem(engine); auto tmpDesc = input.getDesc().cloneWithNewPrecision(outPrc); - tmpMem.Create(std::move(tmpDesc), tmpBuff.data()); + Memory tmpMem(engine, std::move(tmpDesc), tmpBuff.data()); srcMemory = tmpMem.GetPrimitive(); reorder = getReorderPrim(cache, dstMemory.get_engine(), srcMemory.get_desc(), dstMemory.get_desc()); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index 4b105ad46c1651..2b42ca518ff9a2 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -61,7 +61,7 @@ class Reorder : public Node { static std::string getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc); - static void reorderData(const Memory &input, const Memory &output, MultiCachePtr cache = nullptr); + static void reorderData(const IMemory &input, const IMemory &output, MultiCachePtr cache = nullptr); private: dnnl::reorder::primitive prim; diff --git a/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp b/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp index c331d7b0adb64c..be0e6564cf424b 100644 --- a/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorg_yolo.cpp @@ -59,8 +59,8 @@ void ReorgYolo::executeDynamicImpl(dnnl::stream strm) { } void ReorgYolo::execute(dnnl::stream strm) { - const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); + const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + auto *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetData()); const auto &inDims = getParentEdgeAt(0)->getMemory().getStaticDims(); int IW = (inDims.size() > 3) ? inDims[3] : 1; diff --git a/src/plugins/intel_cpu/src/nodes/reshape.cpp b/src/plugins/intel_cpu/src/nodes/reshape.cpp index 158d594bfd9233..2d5c75b553588d 100644 --- a/src/plugins/intel_cpu/src/nodes/reshape.cpp +++ b/src/plugins/intel_cpu/src/nodes/reshape.cpp @@ -46,7 +46,7 @@ class ReshapeShapeInfer : public ShapeInferEmptyPads { const auto& inputShape = input_shapes[RESHAPE_SRC].get(); const size_t inputShapeSize = inputShape.size(); const auto memPtr = data_dependency.at(RESHAPE_PATTERN); - const auto data = memPtr->GetPtr(); + const auto data = memPtr->GetData(); const auto& dims = memPtr->getStaticDims(); const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); std::vector outPattern = ov::get_raw_data_as( @@ -109,7 +109,7 @@ class SqueezeShapeInfer : public ShapeInferEmptyPads { outputShape.reserve(inputShapeSize); if (itr != data_dependency.end()) { const auto memPtr = data_dependency.at(SQUEEZE_PATTERN); - const auto data = memPtr->GetPtr(); + const auto data = memPtr->GetData(); const auto& dims = memPtr->getStaticDims(); const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); std::vector outPattern = ov::get_raw_data_as( @@ -164,7 +164,7 @@ class UnsqueezeShapeInfer : public ShapeInferEmptyPads { const auto& inputShape = input_shapes[UNSQUEEZE_SRC].get(); const size_t inputShapeSize = inputShape.size(); const auto memPtr = data_dependency.at(UNSQUEEZE_PATTERN); - const auto data = memPtr->GetPtr(); + const auto data = memPtr->GetData(); const auto& dims = memPtr->getStaticDims(); const auto outputPatternSize = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); std::vector outPattern = ov::get_raw_data_as( @@ -264,7 +264,7 @@ bool Reshape::needShapeInfer() const { if (lastSecondInputValues.empty()) { lastSecondInputValues.resize(mem.getStaticDims()[0], 0); } - const int32_t *sndInput = reinterpret_cast(mem.GetPtr()); + const int32_t *sndInput = reinterpret_cast(mem.GetData()); for (size_t i = 0; i < lastSecondInputValues.size(); i++) { if (lastSecondInputValues[i] != sndInput[i]) { for (size_t i = 0; i < lastSecondInputValues.size(); i++) { @@ -325,8 +325,8 @@ void Reshape::execute(dnnl::stream strm) { auto srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); auto dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); - auto srcPtr = static_cast(srcMemPtr->GetPtr()); - auto dstPtr = static_cast(dstMemPtr->GetPtr()); + auto srcPtr = static_cast(srcMemPtr->GetData()); + auto dstPtr = static_cast(dstMemPtr->GetData()); if (dstPtr != srcPtr) { cpu_memcpy(dstPtr, srcPtr, dstMemPtr->GetSize()); diff --git a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp index b17841865be2d7..b8c2501e8fce17 100644 --- a/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp +++ b/src/plugins/intel_cpu/src/nodes/reverse_sequence.cpp @@ -129,9 +129,9 @@ ReverseSequence::ReverseSequenceExecutor::ReverseSequenceExecutor(const VectorDi template void ReverseSequence::ReverseSequenceExecutor::exec(const MemoryPtr& dataMemPtr, const MemoryPtr& seqLengthsMemPtr, const MemoryPtr& dstMemPtr) { const VectorDims& srcDims = dataMemPtr->getStaticDims(); - const auto *srcData = reinterpret_cast(dataMemPtr->GetPtr()); - auto *dstData = reinterpret_cast(dstMemPtr->GetPtr()); - auto *seqLengthsData = reinterpret_cast(seqLengthsMemPtr->GetPtr()); + const auto *srcData = reinterpret_cast(dataMemPtr->GetData()); + auto *dstData = reinterpret_cast(dstMemPtr->GetData()); + auto *seqLengthsData = reinterpret_cast(seqLengthsMemPtr->GetData()); for (size_t i = 0; i < srcDims[batchAxis]; ++i) { if (static_cast(seqLengthsData[i]) > static_cast(srcDims[seqAxis])) { diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 7b7f9980c2e6ca..32c4dbadd4fbd1 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -712,8 +712,8 @@ void RNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t rIdx) auto ie_w_ptr = ie_w_vec.data(); auto ie_r_ptr = ie_r_vec.data(); - cpu_convert(wConstBlob->GetPtr(), ie_w_ptr, weightPrec, targetWeightPrec, ie_w_vec_size); - cpu_convert(rConstBlob->GetPtr(), ie_r_ptr, weightPrec, targetWeightPrec, ie_r_vec_size); + cpu_convert(wConstBlob->GetData(), ie_w_ptr, weightPrec, targetWeightPrec, ie_w_vec_size); + cpu_convert(rConstBlob->GetData(), ie_r_ptr, weightPrec, targetWeightPrec, ie_r_vec_size); const int step = SC * G; @@ -760,7 +760,7 @@ void RNN::fillBiases(const int *gate_map) { auto const elementsCount = constBlob->GetSize() / constBlob->getDesc().getPrecision().size(); std::vector ie_b_vec(elementsCount); - cpu_convert(constBlob->GetPtr(), + cpu_convert(constBlob->GetData(), &ie_b_vec[0], DnnlExtensionUtils::DataTypeToIEPrecision(constBlob->GetDataType()), Prec, diff --git a/src/plugins/intel_cpu/src/nodes/roi_align.cpp b/src/plugins/intel_cpu/src/nodes/roi_align.cpp index 33236a7815a89a..bb4ffc76a73817 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_align.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_align.cpp @@ -876,10 +876,10 @@ void ROIAlign::executeSpecified() { auto isPlainFmt = srcBlockDesc->hasLayoutType(LayoutType::ncsp); - const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - const auto *srcRoi = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); - const auto *srcRoiIdx = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); - auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + const auto *srcRoi = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); + const auto *srcRoiIdx = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetData()); + auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); auto nominalRoiCount = static_cast(srcMemory1.getStaticDims()[0]); int realRois = 0; diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp index b082635f2a3b4d..f7e408b046b046 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp @@ -551,18 +551,18 @@ class ROIPooling::ROIPoolingJitExecutor : public ROIPooling::ROIPoolingExecutor } void exec( - const Memory& srcData, - const Memory& srcRoi, - const Memory& dst) override { + const IMemory& srcData, + const IMemory& srcRoi, + const IMemory& dst) override { if (!roi_pooling_kernel) IE_THROW() << "Could not execute. Kernel for RoiPooling node was not compiled."; auto src_strides = srcData.GetDescWithType()->getStrides(); auto src_roi_step = srcRoi.GetDescWithType()->getStrides()[0]; auto dst_strides = dst.GetDescWithType()->getStrides(); - const auto* src_ptr = reinterpret_cast(srcData.GetPtr()); - const auto* roi_ptr = reinterpret_cast(srcRoi.GetPtr()); - auto* dst_ptr = reinterpret_cast(dst.GetPtr()); + const auto* src_ptr = reinterpret_cast(srcData.GetData()); + const auto* roi_ptr = reinterpret_cast(srcRoi.GetData()); + auto* dst_ptr = reinterpret_cast(dst.GetData()); executeOptimizedGeneric(src_ptr, roi_ptr, dst_ptr, src_strides, dst_strides, src_roi_step); } @@ -674,15 +674,15 @@ class ROIPooling::ROIPoolingRefExecutor : public ROIPooling::ROIPoolingExecutor public: ROIPoolingRefExecutor(const jit_roi_pooling_params &_jpp) : jpp(_jpp) {} void exec( - const Memory& srcData, - const Memory& srcRoi, - const Memory& dst) override { + const IMemory& srcData, + const IMemory& srcRoi, + const IMemory& dst) override { auto src_strides = srcData.GetDescWithType()->getStrides(); auto src_roi_step = srcRoi.GetDescWithType()->getStrides()[0]; auto dst_strides = dst.GetDescWithType()->getStrides(); - const auto* src_ptr = reinterpret_cast(srcData.GetPtr()); - const auto* roi_ptr = reinterpret_cast(srcRoi.GetPtr()); - auto* dst_ptr = reinterpret_cast(dst.GetPtr()); + const auto* src_ptr = reinterpret_cast(srcData.GetData()); + const auto* roi_ptr = reinterpret_cast(srcRoi.GetData()); + auto* dst_ptr = reinterpret_cast(dst.GetData()); executeReference(src_ptr, roi_ptr, dst_ptr, src_strides, dst_strides, src_roi_step); } diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.h b/src/plugins/intel_cpu/src/nodes/roi_pooling.h index 5135bdc3db929f..ee3d3b9852ddde 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.h +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.h @@ -93,9 +93,9 @@ class ROIPooling : public Node { public: ROIPoolingExecutor() = default; virtual void exec( - const ov::intel_cpu::Memory& srcData, - const ov::intel_cpu::Memory& srcRoi, - const ov::intel_cpu::Memory& dst) = 0; + const ov::intel_cpu::IMemory& srcData, + const ov::intel_cpu::IMemory& srcRoi, + const ov::intel_cpu::IMemory& dst) = 0; virtual ~ROIPoolingExecutor() = default; static std::shared_ptr createROIPoolingNewExecutor(const jit_roi_pooling_params& jpp); diff --git a/src/plugins/intel_cpu/src/nodes/roll.cpp b/src/plugins/intel_cpu/src/nodes/roll.cpp index 4e6f2c8d051d52..8cf7ea56194868 100644 --- a/src/plugins/intel_cpu/src/nodes/roll.cpp +++ b/src/plugins/intel_cpu/src/nodes/roll.cpp @@ -178,10 +178,10 @@ Roll::RollExecutor::RollExecutor(const VectorDims& dataDims, const VectorDims& s template void Roll::RollExecutor::exec(const MemoryPtr& dataMemPtr, const MemoryPtr& shiftMemPtr, const MemoryPtr& axesMemPtr, const MemoryPtr& dstMemPtr) { - const auto *data = reinterpret_cast(dataMemPtr->GetPtr()); - const auto *shift = reinterpret_cast(shiftMemPtr->GetPtr()); - const auto *axes = reinterpret_cast(axesMemPtr->GetPtr()); - auto *dst = reinterpret_cast(dstMemPtr->GetPtr()); + const auto *data = reinterpret_cast(dataMemPtr->GetData()); + const auto *shift = reinterpret_cast(shiftMemPtr->GetData()); + const auto *axes = reinterpret_cast(axesMemPtr->GetData()); + auto *dst = reinterpret_cast(dstMemPtr->GetData()); std::vector shiftsVector(numOfDims, 0ul); const VectorDims& dataDims = dataMemPtr->getStaticDims(); diff --git a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp index 6198c21eee717f..e73d949b35b402 100644 --- a/src/plugins/intel_cpu/src/nodes/scatter_update.cpp +++ b/src/plugins/intel_cpu/src/nodes/scatter_update.cpp @@ -259,10 +259,10 @@ void ScatterUpdate::execute(dnnl::stream strm) { auto indicesMemPtr = getParentEdgeAt(INDICES_ID)->getMemoryPtr(); auto updateMemPtr = getParentEdgeAt(UPDATE_ID)->getMemoryPtr(); - uint8_t *dstPtr = reinterpret_cast(dstMemPtr->GetPtr()); - uint8_t *srcPtr = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t *indicesPtr = reinterpret_cast(indicesMemPtr->GetPtr()); - uint8_t *updatePtr = reinterpret_cast(updateMemPtr->GetPtr()); + uint8_t *dstPtr = reinterpret_cast(dstMemPtr->GetData()); + uint8_t *srcPtr = reinterpret_cast(srcMemPtr->GetData()); + uint8_t *indicesPtr = reinterpret_cast(indicesMemPtr->GetData()); + uint8_t *updatePtr = reinterpret_cast(updateMemPtr->GetData()); const auto& srcDataDim = getParentEdgeAt(DATA_ID)->getMemory().getStaticDims(); const auto& indicesDim = getParentEdgeAt(INDICES_ID)->getMemory().getStaticDims(); @@ -270,7 +270,7 @@ void ScatterUpdate::execute(dnnl::stream strm) { int axis = 0; if (axisRelaxed) { auto axisMemPtr = getParentEdgeAt(AXIS_ID)->getMemoryPtr(); - uint8_t *axisPtr = reinterpret_cast(axisMemPtr->GetPtr()); + uint8_t *axisPtr = reinterpret_cast(axisMemPtr->GetData()); if (axisSize == 4) { auto *axisPtr32 = reinterpret_cast(axisPtr); axis = *axisPtr32; diff --git a/src/plugins/intel_cpu/src/nodes/shapeof.cpp b/src/plugins/intel_cpu/src/nodes/shapeof.cpp index 601b83c7b0649b..44ab4a4ef8735f 100644 --- a/src/plugins/intel_cpu/src/nodes/shapeof.cpp +++ b/src/plugins/intel_cpu/src/nodes/shapeof.cpp @@ -102,7 +102,7 @@ void ShapeOf::execute(dnnl::stream strm) { if (outPtr->getStaticDims().size() != 1 || dimsCount != outPtr->getStaticDims()[0]) IE_THROW() << errorPrefix << "has inconsistent input shape and output size"; - auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); for (size_t i = 0; i < dimsCount; i++) { dst[i] = inDims[i]; diff --git a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp index ae01c05a16ccef..437912d441e910 100644 --- a/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp +++ b/src/plugins/intel_cpu/src/nodes/shuffle_channels.cpp @@ -294,8 +294,8 @@ void ShuffleChannels::execute(dnnl::stream strm) { int MB = (attrs.axis != 0) ? getParentEdgeAt(0)->getMemoryPtr()->getStaticDims()[0] : -1; - const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); execPtr->exec(srcData, dstData, MB); } diff --git a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp index 116d7ae61f6742..33727f7e5e665e 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_batch.cpp @@ -101,21 +101,21 @@ void SpaceToBatch::SpaceToBatchKernel() { const auto& srcMem = getParentEdgesAtPort(0)[0]->getMemoryPtr(); const auto& dstMem = getChildEdgesAtPort(0)[0]->getMemoryPtr(); - const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); + const auto *blockShapesPtr = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetData()); size_t dataRank = srcMem->GetShape().getRank(); blockShapeIn.clear(); for (size_t i = 0; i < dataRank; i++) { blockShapeIn.push_back(*(blockShapesPtr + i)); } - const auto *padsBeginPtr = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); + const auto *padsBeginPtr = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetData()); padsBeginIn.clear(); for (size_t i = 0; i < dataRank; i++) { padsBeginIn.push_back(*(padsBeginPtr + i)); } - const auto *srcData = reinterpret_cast(srcMem->GetPtr()); - auto *dstData = reinterpret_cast(dstMem->GetPtr()); + const auto *srcData = reinterpret_cast(srcMem->GetData()); + auto *dstData = reinterpret_cast(dstMem->GetData()); const int64_t srcLen = srcMem->GetSize() / sizeof(T); const int64_t dstLen = dstMem->GetSize() / sizeof(T); diff --git a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp index f97ef8bf06c59d..1a27b9afa175a1 100644 --- a/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp +++ b/src/plugins/intel_cpu/src/nodes/space_to_depth.cpp @@ -313,8 +313,8 @@ void SpaceToDepth::execute(dnnl::stream strm) { if (!execPtr) { THROW_ERROR << "doesn't have a compiled executor."; } - const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); + const uint8_t* srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); + uint8_t* dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetData()); const int MB = getParentEdgeAt(0)->getMemoryPtr()->getStaticDims()[0]; execPtr->exec(srcData, dstData, MB); } diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index bb26d696133180..f34ef95b0d89bd 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -212,7 +212,7 @@ bool Split::needShapeInfer() const { if (curLengthsSize != splitLengths.size()) { return true; } - const int* curLengthsValues = reinterpret_cast(lengthsMemPtr->GetPtr()); + const int* curLengthsValues = reinterpret_cast(lengthsMemPtr->GetData()); for (size_t i = 0; i < curLengthsSize; ++i) { if (curLengthsValues[i] != splitLengths[i]) { return true; @@ -237,7 +237,7 @@ void Split::prepareParams() { if (!constSplitLengths) { const auto& splitLengthsPtr = getParentEdgeAt(2)->getMemoryPtr(); - const int* curSplitLengths = reinterpret_cast(splitLengthsPtr->GetPtr()); + const int* curSplitLengths = reinterpret_cast(splitLengthsPtr->GetData()); const auto curLengthsSize = splitLengthsPtr->getStaticDims()[0]; splitLengths.assign(curSplitLengths, curSplitLengths + curLengthsSize); } @@ -286,7 +286,7 @@ void Split::execute(dnnl::stream strm) { return; } - uint8_t* srcData = reinterpret_cast(srcMem.GetPtr()); + uint8_t* srcData = reinterpret_cast(srcMem.GetData()); IE_ASSERT(execPtr != nullptr); execPtr->exec(srcData, getRawDstMemPtrs()); } @@ -429,7 +429,7 @@ void Split::optimizedNspc2Ncsp(size_t MB) { const size_t strideOC = DHW * dataSize; for (size_t i = 0, sIdx = 0; i < dstMemPtrs.size(); i++) { - auto dstData = reinterpret_cast(dstMemPtrs[i].second->GetPtr()); + auto dstData = reinterpret_cast(dstMemPtrs[i].second->GetData()); size_t innerSize = 1; auto dims = getChildEdgesAtPort(dstMemPtrs[i].first)[0]->getMemory().getStaticDims(); @@ -459,7 +459,7 @@ void Split::optimizedNspc2Ncsp(size_t MB) { std::vector Split::getRawDstMemPtrs() const { std::vector result(dstMemPtrs.size()); for (size_t i = 0; i < dstMemPtrs.size(); ++i) { - result[i] = reinterpret_cast(dstMemPtrs[i].second->GetPtr()); + result[i] = reinterpret_cast(dstMemPtrs[i].second->GetData()); if (!result[i]) { THROW_ERROR << "can't get child edge indx " << dstMemPtrs[i].first << " data."; } @@ -543,8 +543,7 @@ void Split::resolveInPlaceEdges(Edge::LOOK look) { // getName() << " with type " << getTypeStr(); auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); - auto newMem = std::make_shared(getEngine()); - newMem->Create(selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); + auto newMem = std::make_shared(getEngine(), std::unique_ptr(memMngr.get()), selected_pd->getConfig().outConfs[i].getMemDesc()); childEdge->resetMemoryPtr(newMem); } diff --git a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp index 810db4247640a7..c5db3665c58398 100644 --- a/src/plugins/intel_cpu/src/nodes/strided_slice.cpp +++ b/src/plugins/intel_cpu/src/nodes/strided_slice.cpp @@ -63,9 +63,9 @@ class StridedSliceShapeInfer : public ShapeInferEmptyPads { data_dependency.at(STRIDE_ID)->getDesc().getPrecision() != Precision::I32) { IE_THROW(Unexpected) << "The data type of begin/end/stride is NOT I32, which is unexpected!"; } - auto beginPtr = reinterpret_cast(data_dependency.at(BEGIN_ID)->GetPtr()); - auto endPtr = reinterpret_cast(data_dependency.at(END_ID)->GetPtr()); - auto stridePtr = reinterpret_cast(data_dependency.at(STRIDE_ID)->GetPtr()); + auto beginPtr = reinterpret_cast(data_dependency.at(BEGIN_ID)->GetData()); + auto endPtr = reinterpret_cast(data_dependency.at(END_ID)->GetData()); + auto stridePtr = reinterpret_cast(data_dependency.at(STRIDE_ID)->GetData()); for (size_t i = 0, new_idx = 0; i < shapeIn.size(); ++i) { if (m_new_axis_mask_set.count(i)) { @@ -505,7 +505,7 @@ void StridedSlice::StridedSliceCommonExecutor::paramsInitialization(const Stride const size_t nDims = std::max(inputRank, outputRank); auto fillingInParameters = [&](std::vector ¶meter, const size_t type, const size_t size, const int value) { - const int *ptr = reinterpret_cast(srcMemory[type]->GetPtr()); + const int *ptr = reinterpret_cast(srcMemory[type]->GetData()); parameter.assign(ptr, ptr + size); if (type != AXES_ID && params.attrs.ellipsisMaskCounter == 0 && size < nDims) { @@ -840,8 +840,8 @@ void StridedSlice::StridedSliceCommonExecutor::indicesCalculationForOptimized() } void StridedSlice::StridedSliceCommonExecutor::exec(const std::vector& srcMemory, const std::vector& dstMemory) { - const uint8_t* srcData = reinterpret_cast(srcMemory[0]->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemory[0]->GetPtr()); + const uint8_t* srcData = reinterpret_cast(srcMemory[0]->GetData()); + uint8_t* dstData = reinterpret_cast(dstMemory[0]->GetData()); const uint8_t* srcShiftedData = srcData + srcShift; parallel_nt(nThreads, [&](const int ithr, const int nthr) { size_t start = 0, end = 0; diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index f57128a2443cab..5075e64ddff46f 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -297,8 +297,7 @@ MemoryPtr DynamicBuffer::create_buffer(const dnnl::engine& eng) { auto _descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); auto new_buffer_desc = _descCreator->createSharedDesc(from->getDesc().getPrecision(), _shape); - auto _ptr = std::make_shared(eng); - _ptr->Create(*new_buffer_desc); + auto _ptr = std::make_shared(eng, new_buffer_desc); return _ptr; } @@ -313,7 +312,7 @@ void DynamicBuffer::move_buffer(const MemoryPtr& new_buffer) { const auto src_offset_in_byte = stride > 0 ? 0 : (src_stride - valid_size); chunk_offset_in_byte = stride > 0 ? 0 : (dst_stride - valid_size); // reset chunk_offset_in_byte - copy(reinterpret_cast(mem_holder_buffer->GetPtr()) + src_offset_in_byte, reinterpret_cast(new_buffer->GetPtr()) + chunk_offset_in_byte, + copy(reinterpret_cast(mem_holder_buffer->GetData()) + src_offset_in_byte, reinterpret_cast(new_buffer->GetData()) + chunk_offset_in_byte, src_stride, dst_stride, count, valid_size); // assign mem_holder_buffer @@ -332,7 +331,7 @@ void DynamicBuffer::move_data() { const auto src_stride = abs(map_rule.stride) * len; const auto dst_stride = chunk_stride_in_byte; - copy(reinterpret_cast(from->GetPtr()), reinterpret_cast(mem_holder_buffer->GetPtr()) + chunk_offset_in_byte, + copy(reinterpret_cast(from->GetData()), reinterpret_cast(mem_holder_buffer->GetData()) + chunk_offset_in_byte, src_stride, dst_stride, count, chunk_unit_in_byte); // adjust for next execution @@ -363,7 +362,7 @@ void DynamicBuffer::transfer(const Node* node) { const auto dst_stride = to.front()->getStaticDims()[axis] * len; const auto valid_size = chunk_unit_in_byte * num_execs; const auto src_offset_in_byte = stride > 0 ? 0 : (src_stride - valid_size); - copy(reinterpret_cast(mem_holder_buffer->GetPtr()) + src_offset_in_byte, reinterpret_cast(to.front()->GetPtr()), + copy(reinterpret_cast(mem_holder_buffer->GetData()) + src_offset_in_byte, reinterpret_cast(to.front()->GetData()), src_stride, dst_stride, count, dst_stride); } else { VectorDims newDims = to.front()->GetShape().getDims(); @@ -521,8 +520,8 @@ void TensorIterator::createPrimitive() { bool TensorIterator::needPrepareParams() const { if (getAlgorithm() == Algorithm::TensorIteratorLoop) { - const auto tripCountPtr = reinterpret_cast(getParentEdgesAtPort(loopTripCountIdx).front()->getMemoryPtr()->GetPtr()); - const auto condPtr = reinterpret_cast(getParentEdgesAtPort(loopExecutionConditionIdx).front()->getMemoryPtr()->GetPtr()); + const auto tripCountPtr = reinterpret_cast(getParentEdgesAtPort(loopTripCountIdx).front()->getMemoryPtr()->GetData()); + const auto condPtr = reinterpret_cast(getParentEdgesAtPort(loopExecutionConditionIdx).front()->getMemoryPtr()->GetData()); if (tripCountPtr[0] != static_cast(lastUsedTripCount) || static_cast(condPtr[0]) != lastUsedCond) return true; } diff --git a/src/plugins/intel_cpu/src/nodes/tile.cpp b/src/plugins/intel_cpu/src/nodes/tile.cpp index 77be22d68276a3..55f3e276866f77 100644 --- a/src/plugins/intel_cpu/src/nodes/tile.cpp +++ b/src/plugins/intel_cpu/src/nodes/tile.cpp @@ -100,7 +100,7 @@ void Tile::prepareParams() { if (!constMap[TILE_REPEATS]) { const auto& repeatsMem = getParentEdgesAtPort(TILE_REPEATS)[0]->getMemory(); - const int32_t* repeatsData = reinterpret_cast(repeatsMem.GetPtr()); + const int32_t* repeatsData = reinterpret_cast(repeatsMem.GetData()); originRepeats.assign(repeatsData, repeatsData + repeatsMem.getStaticDims()[0]); repeats.assign(std::max(originRepeats.size(), getInputShapeAtPort(TILE_INPUT).getRank()), 1lu); @@ -124,7 +124,7 @@ bool Tile::needShapeInfer() const { if (!constMap[TILE_REPEATS]) { if (originRepeats.empty()) return true; - const int32_t* repeatsData = reinterpret_cast(getParentEdgesAtPort(TILE_REPEATS)[0]->getMemory().GetPtr()); + const int32_t* repeatsData = reinterpret_cast(getParentEdgesAtPort(TILE_REPEATS)[0]->getMemory().GetData()); for (size_t i = 0lu; i < originRepeats.size(); i++) { if (originRepeats[i] != static_cast(repeatsData[i])) return true; @@ -153,8 +153,8 @@ void Tile::plainExecute(dnnl::stream strm) { auto& srcMemory = getParentEdgeAt(TILE_INPUT)->getMemory(); - const uint8_t* src_ptr = reinterpret_cast(srcMemory.GetPtr()); - uint8_t* dst_ptr = reinterpret_cast(getChildEdgeAt(0)->getMemory().GetPtr()); + const uint8_t* src_ptr = reinterpret_cast(srcMemory.GetData()); + uint8_t* dst_ptr = reinterpret_cast(getChildEdgeAt(0)->getMemory().GetData()); int m_inner_dim = 1; int m_outer_dim = 1; diff --git a/src/plugins/intel_cpu/src/nodes/topk.cpp b/src/plugins/intel_cpu/src/nodes/topk.cpp index 939792f1bc2f02..e4ea3db60a29e8 100644 --- a/src/plugins/intel_cpu/src/nodes/topk.cpp +++ b/src/plugins/intel_cpu/src/nodes/topk.cpp @@ -1944,12 +1944,12 @@ void TopK::initSupportedPrimitiveDescriptors() { } bool TopK::needShapeInfer() const { - const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetPtr())[0]; + const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetData())[0]; return inputShapesModified() || src_k != top_k; } bool TopK::needPrepareParams() const { - const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetPtr())[0]; + const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetData())[0]; return inputShapesModified() || top_k != src_k; } @@ -1994,14 +1994,14 @@ void TopK::prepareParams() { dst_dims = dstMemPtr->getDesc().getShape().getDims(); if (isDynamicNode()) { - const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetPtr())[0]; + const int src_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetData())[0]; if (static_cast(src_k) > src_dims[axis]) IE_THROW() << errorPrefix << " gets top_k out of range!"; if (top_k != src_k) { top_k = src_k; } } else { - top_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetPtr())[0]; + top_k = reinterpret_cast(getParentEdgeAt(TOPK_K)->getMemoryPtr()->GetData())[0]; } if (jit_mode) { @@ -2141,9 +2141,9 @@ void TopK::execute(dnnl::stream strm) { auto dstMemPtr = getChildEdgeAt(TOPK_DATA)->getMemoryPtr(); auto dstIndexesMemPtr = getChildEdgeAt(TOPK_INDEX)->getMemoryPtr(); - const uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); - uint8_t *dst_idx = reinterpret_cast(dstIndexesMemPtr->GetPtr()); + const uint8_t *src_data = reinterpret_cast(srcMemPtr->GetData()); + uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetData()); + uint8_t *dst_idx = reinterpret_cast(dstIndexesMemPtr->GetData()); if (jit_mode) { topk_process(src_data, dst_data, dst_idx); diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index dd87e98e7d4f7f..84cd476db7950d 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -216,7 +216,7 @@ void Transpose::prepareParams() { params.dst_block_dims = dstDesc->getBlockDims(); if (!isInputOrderConst) { - auto orderPtr = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); + auto orderPtr = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetData()); auto orderLen = getParentEdgeAt(0)->getMemoryPtr()->GetSize(); params.order.assign(orderPtr, orderPtr + orderLen); } @@ -275,8 +275,8 @@ void Transpose::createPrimitive() { template static void transpose_to_0312(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { - const auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); - auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + const auto src_data = reinterpret_cast(srcMemPtr->GetData()); + auto dst_data = reinterpret_cast(dstMemPtr->GetData()); const int DIM1 = srcMemPtr->getStaticDims()[1]; const int DIM2 = srcMemPtr->getStaticDims()[2]; @@ -300,8 +300,8 @@ static void transpose_to_0312(const int MB, const MemoryPtr& srcMemPtr, MemoryPt template static void transpose_to_04123(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { - const auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); - auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + const auto src_data = reinterpret_cast(srcMemPtr->GetData()); + auto dst_data = reinterpret_cast(dstMemPtr->GetData()); const int DIM1 = srcMemPtr->getStaticDims()[1]; const int DIM2 = srcMemPtr->getStaticDims()[2]; @@ -328,8 +328,8 @@ static void transpose_to_04123(const int MB, const MemoryPtr& srcMemPtr, MemoryP template static void transpose_to_051234(const int MB, const MemoryPtr& srcMemPtr, MemoryPtr& dstMemPtr) { - const auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); - auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); + const auto src_data = reinterpret_cast(srcMemPtr->GetData()); + auto dst_data = reinterpret_cast(dstMemPtr->GetData()); const int DIM1 = srcMemPtr->getStaticDims()[1]; const int DIM2 = srcMemPtr->getStaticDims()[2]; @@ -401,8 +401,8 @@ void Transpose::TransposeJitExecutor::exec(Transpose* node, MemoryPtr& srcMemPtr if (!pKernel) IE_THROW() << "Could not execute. Kernel for Transpose node was not compiled."; - const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetPtr()); - uint8_t* dstData = reinterpret_cast(dstMemPtr->GetPtr()); + const uint8_t* srcData = reinterpret_cast(srcMemPtr->GetData()); + uint8_t* dstData = reinterpret_cast(dstMemPtr->GetData()); pKernel->execute(srcData, dstData, MB); } diff --git a/src/plugins/intel_cpu/src/nodes/unique.cpp b/src/plugins/intel_cpu/src/nodes/unique.cpp index 7cf6853f3e4565..9d1b0c8ee22d99 100644 --- a/src/plugins/intel_cpu/src/nodes/unique.cpp +++ b/src/plugins/intel_cpu/src/nodes/unique.cpp @@ -164,7 +164,7 @@ void Unique::executeDynamicImpl(dnnl::stream strm) { template void Unique::flattenTensorExec() { - const T* srcDataPtr = reinterpret_cast(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr()); + const T* srcDataPtr = reinterpret_cast(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetData()); const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / sizeof(T); std::vector uniDataTmp(inputLen); auto uniDataTmpPtr = uniDataTmp.data(); @@ -263,25 +263,25 @@ void Unique::flattenTensorExec() { redefineOutputMemory({ {uniqueLen}, {uniqueLen}, {inputLen}, {uniqueLen}}); - T* uniDataPtr = reinterpret_cast(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr()); + T* uniDataPtr = reinterpret_cast(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetData()); memcpy(uniDataPtr, uniDataTmpPtr, uniqueLen * sizeof(T)); if (definedOutputs[FIRST_UNIQUE_IDX]) { - int *firstPtr = reinterpret_cast(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr()); + int *firstPtr = reinterpret_cast(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetData()); memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int)); } if (definedOutputs[INPUT_TO_UNIQ_IDX]) { - auto inToOutPtr = reinterpret_cast(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr()); + auto inToOutPtr = reinterpret_cast(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetData()); memcpy(inToOutPtr, inToOutTmp.data(), inputLen * sizeof(int)); } if (definedOutputs[OCCURRENCES_NUM]) { - auto occurPtr = reinterpret_cast(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr()); + auto occurPtr = reinterpret_cast(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetData()); memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int)); } } template void Unique::slicedTensorExec() { - const T* srcDataPtr = reinterpret_cast(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetPtr()); + const T* srcDataPtr = reinterpret_cast(getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetData()); const size_t inputLen = getParentEdgeAt(IN_DATA)->getMemoryPtr()->GetSize() / sizeof(T); std::vector uniDataTmp(inputLen); auto uniDataTmpPtr = uniDataTmp.data(); @@ -478,18 +478,18 @@ void Unique::slicedTensorExec() { dstDataShape[axis] = uniqueLen; redefineOutputMemory({ dstDataShape, {uniqueLen}, {cmpBlNum}, {uniqueLen}}); - T* uniDataPtr = reinterpret_cast(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetPtr()); + T* uniDataPtr = reinterpret_cast(getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetData()); memcpy(uniDataPtr, uniDataTmpPtr, getChildEdgesAtPort(UNIQUE_DATA)[0]->getMemoryPtr()->GetSize()); if (definedOutputs[FIRST_UNIQUE_IDX]) { - int *firstPtr = reinterpret_cast(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetPtr()); + int *firstPtr = reinterpret_cast(getChildEdgesAtPort(FIRST_UNIQUE_IDX)[0]->getMemoryPtr()->GetData()); memcpy(firstPtr, firstUniTmp.data(), uniqueLen * sizeof(int)); } if (definedOutputs[INPUT_TO_UNIQ_IDX]) { - auto inToOutPtr = reinterpret_cast(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetPtr()); + auto inToOutPtr = reinterpret_cast(getChildEdgesAtPort(INPUT_TO_UNIQ_IDX)[0]->getMemoryPtr()->GetData()); memcpy(inToOutPtr, inToOutTmp.data(), cmpBlNum * sizeof(int)); } if (definedOutputs[OCCURRENCES_NUM]) { - auto occurPtr = reinterpret_cast(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetPtr()); + auto occurPtr = reinterpret_cast(getChildEdgesAtPort(OCCURRENCES_NUM)[0]->getMemoryPtr()->GetData()); memcpy(occurPtr, occurTmp.data(), uniqueLen * sizeof(int)); } } diff --git a/src/plugins/intel_cpu/src/utils/blob_dump.cpp b/src/plugins/intel_cpu/src/utils/blob_dump.cpp index 2cbc619c7dbb1b..4ea9d8d1f61970 100644 --- a/src/plugins/intel_cpu/src/utils/blob_dump.cpp +++ b/src/plugins/intel_cpu/src/utils/blob_dump.cpp @@ -94,7 +94,7 @@ void BlobDumper::prepare_plain_data(const MemoryPtr &memory, std::vector(memory->GetPtr()), size); + cpu_memcpy(data.data(), reinterpret_cast(memory->GetData()), size); return; } diff --git a/src/plugins/intel_cpu/src/utils/blob_dump.h b/src/plugins/intel_cpu/src/utils/blob_dump.h index 6f90160cae0b82..9ab407b3cd94be 100644 --- a/src/plugins/intel_cpu/src/utils/blob_dump.h +++ b/src/plugins/intel_cpu/src/utils/blob_dump.h @@ -29,8 +29,7 @@ class BlobDumper { BlobDumper() = default; BlobDumper(const DnnlBlockedMemoryDesc &desc) { dnnl::engine eng(dnnl::engine::kind::cpu, 0); - memory = std::make_shared(eng); - memory->Create(desc); + memory = std::make_shared(eng, desc); } BlobDumper(const BlobDumper&) = default; BlobDumper& operator = (BlobDumper&&) = default; @@ -47,7 +46,7 @@ class BlobDumper { void dumpAsTxt(std::ostream &stream) const; void *getDataPtr() const { - return memory->GetPtr(); + return memory->GetData(); } }; diff --git a/src/plugins/intel_cpu/src/utils/node_dumper.cpp b/src/plugins/intel_cpu/src/utils/node_dumper.cpp index 3043a3e40edd40..44fdc67e8e46ce 100644 --- a/src/plugins/intel_cpu/src/utils/node_dumper.cpp +++ b/src/plugins/intel_cpu/src/utils/node_dumper.cpp @@ -110,8 +110,7 @@ static void dumpInternalBlobs(const NodePtr& node, const DebugCapsConfig& config if (desc.getPrecision() == Precision::BIN) continue; - MemoryPtr memory = std::make_shared(node->getEngine()); - memory->Create(MemoryDescUtils::convertToDnnlBlockedMemoryDesc(desc), blb->buffer()); + MemoryPtr memory = std::make_shared(node->getEngine(), MemoryDescUtils::convertToDnnlBlockedMemoryDesc(desc), blb->buffer()); BlobDumper dumper(memory); dump(dumper, dump_file, config); } diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_ngraph.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_ngraph.cpp index ef325b22a7dc86..f6744721977bca 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_ngraph.cpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference_ngraph.cpp @@ -36,7 +36,7 @@ NgraphShapeInfer::infer( input_values[port] = std::make_shared( InferenceEngine::details::convertPrecision(memPtr->getDesc().getPrecision()), shape, - memPtr->GetPtr()); + memPtr->GetData()); } } // call shape inference API diff --git a/src/plugins/intel_cpu/src/weights_cache.hpp b/src/plugins/intel_cpu/src/weights_cache.hpp index 3555dee9e0a8b1..dbc674213348ce 100644 --- a/src/plugins/intel_cpu/src/weights_cache.hpp +++ b/src/plugins/intel_cpu/src/weights_cache.hpp @@ -63,7 +63,7 @@ class WeightsSharing { {} std::mutex guard; - std::weak_ptr sharedMemory; + std::weak_ptr sharedMemory; std::atomic valid; }; From b33eb559d1afff91d21883b1d27b3dc05e3cde6b Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 1 Jun 2023 18:34:27 +0200 Subject: [PATCH 14/28] Allow NotAllocated edges call allocate --- src/plugins/intel_cpu/src/edge.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 97a86b36bdfa6f..44f0a5e9a8bf44 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -253,9 +253,6 @@ int Edge::getOutputNum() const { } void Edge::allocateCommon(const std::function& allocate) { - if (status != Status::NeedAllocation) - return; - if (memoryPtr) IE_THROW() << "Unexpected behaviour: status == NeedAllocation but memory is already allocated."; From 0cea5f82534043148123818949e7f68c3245c701 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 2 Jun 2023 19:29:51 +0200 Subject: [PATCH 15/28] IMemory fixes after merge --- src/plugins/intel_cpu/src/cpu_memory.cpp | 98 ++++++++++--------- src/plugins/intel_cpu/src/cpu_memory.h | 86 +++++----------- .../intel_cpu/src/dnnl_postops_composer.cpp | 3 - src/plugins/intel_cpu/src/dnnl_scratch_pad.h | 3 +- src/plugins/intel_cpu/src/edge.cpp | 56 +++-------- src/plugins/intel_cpu/src/edge.h | 3 +- src/plugins/intel_cpu/src/node.cpp | 8 +- src/plugins/intel_cpu/src/nodes/concat.cpp | 4 +- src/plugins/intel_cpu/src/nodes/conv.cpp | 9 +- src/plugins/intel_cpu/src/nodes/eltwise.cpp | 2 +- .../intel_cpu/src/nodes/fake_quantize.cpp | 2 +- src/plugins/intel_cpu/src/nodes/gather.cpp | 4 +- src/plugins/intel_cpu/src/nodes/input.cpp | 6 +- src/plugins/intel_cpu/src/nodes/reorder.cpp | 2 +- src/plugins/intel_cpu/src/nodes/split.cpp | 4 +- .../intel_cpu/src/nodes/tensoriterator.cpp | 3 +- .../tests/unit/nodes/reorder_node_test.cpp | 6 +- 17 files changed, 116 insertions(+), 183 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index f8c3cde89e3f98..46e5b4fd88567e 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -34,15 +34,26 @@ namespace { } } // namespace -Memory::Memory(const dnnl::engine& eng, MemoryDescPtr _pMemDesc, const void* data, bool pads_zeroing) : - eng(eng), mgrHandle(std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), this), dnnlMemHandle(this), IMemory(_pMemDesc) {Create(pMemDesc, data, pads_zeroing);} -Memory::Memory(const dnnl::engine& eng, const MemoryDesc& _MemDesc, const void* data, bool pads_zeroing) : - eng(eng), mgrHandle(std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), this), dnnlMemHandle(this), IMemory(_MemDesc.clone()) {Create(pMemDesc, data, pads_zeroing);} +Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bool pads_zeroing) : + eng(eng), + mgrHandle(std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), this), + dnnlMemHandle(this), + pMemDesc(desc) { + Create(pMemDesc, data, pads_zeroing); + } + +Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data, bool pads_zeroing) : + Memory::Memory(eng, desc.clone(), data, pads_zeroing) {} -Memory::Memory(const dnnl::engine& eng, std::unique_ptr mngr, MemoryDescPtr _pMemDesc) : - eng(eng), mgrHandle(std::make_shared(std::move(mngr)), this), dnnlMemHandle(this), IMemory(_pMemDesc) { Create(_pMemDesc, mgrHandle.get());} -Memory::Memory(const dnnl::engine& eng, std::unique_ptr mngr, const MemoryDesc& _MemDesc) : - eng(eng), mgrHandle(std::make_shared(std::move(mngr)), this), dnnlMemHandle(this), IMemory(_MemDesc.clone()) { Create(pMemDesc, mgrHandle.get());} +Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryMngrPtr mngr) : + eng(eng), pMemDesc(desc), mgrHandle(mngr, this), dnnlMemHandle(this) { + bool memAllocated = mgrHandle->getRawPtr(); + + Create(desc, nullptr, !memAllocated); + } + +Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryMngrPtr mngr) : + Memory::Memory(eng, desc.clone(), mngr) {} size_t Memory::GetSize() const { auto size = getDesc().getCurrentMemSize(); @@ -99,39 +110,23 @@ void Memory::FillZero() { memset(dataPtr, 0, getDesc().getCurrentMemSize()); } -// void *Memory::GetPtr() const { -// auto ptr = static_cast(GetData()); -// ptr += pMemDesc->getOffsetPadding() * pMemDesc->getPrecision().size(); -// return ptr; -// } - -void Memory::redefineDesc(MemoryDescPtr desc, const void* data, bool pads_zeroing) { +void Memory::redefineDesc(MemoryDescPtr desc) { if (!desc->hasDefinedMaxSize()) { IE_THROW() << "Can not reset descriptor, memory upper bound is unknown."; } - this->Create(desc, data, pads_zeroing); // nullptr, false + this->Create(desc, nullptr, false); } template<> DnnlMemoryDescPtr IMemory::GetDescWithType() const { - return MemoryDescUtils::convertToDnnlMemoryDesc(pMemDesc); + return MemoryDescUtils::convertToDnnlMemoryDesc(getDescPtr()); } -// void Memory::setDataHandle(void *data) { -// if (!mgrHandle->hasExtBuffer()) { -// mgrHandle = DnnlMemMngrHandle( -// std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), -// this); -// } - -// size_t maxMemSize = pMemDesc->isDefined() ? pMemDesc->getCurrentMemSize() : 0; -// mgrHandle->setExtBuff(data, maxMemSize); -// if (dnnlMemHandle.isInit()) { -// auto prim = dnnlMemHandle.getPrim(); -// prim.set_data_handle(mgrHandle->getRawPtr()); // for pads zeroing, to preserve dnnl::memory::set_data_handle behaviour -// } -// } +template<> +BlockedMemoryDescPtr IMemory::GetDescWithType() const { + return MemoryDescUtils::convertToBlockedMemoryDesc(getDescPtr()); +} void Memory::update() { if (dnnlMemHandle.isInit()) { @@ -140,22 +135,6 @@ void Memory::update() { } } -void Memory::Create(const MemoryDesc &desc, MemoryMngrPtr memMgr) { - Create(desc.clone(), memMgr); -} - -void Memory::Create(MemoryDescPtr desc, MemoryMngrPtr memMgr) { - mgrHandle = DnnlMemMngrHandle(memMgr, this); - bool memAllocated = mgrHandle->getRawPtr(); - - Create(desc, nullptr, !memAllocated); -} - -template<> -BlockedMemoryDescPtr IMemory::GetDescWithType() const { - return MemoryDescUtils::convertToBlockedMemoryDesc(pMemDesc); -} - dnnl::memory Memory::GetPrimitive() const { return dnnlMemHandle.getPrim(); } @@ -195,6 +174,31 @@ dnnl::memory Memory::DnnlMemPrimHandle::getPrim() const { return m_prim; } +bool Memory::isAllocated() const noexcept { + if (mgrHandle->getRawPtr()) { + return true; + } + if (!pMemDesc) { + return false; + } + if (!(pMemDesc->isDefined())) { + return true; + } + if (pMemDesc->getCurrentMemSize() == 0) { + return true; + } + return false; +} + +void* Memory::GetData() const { + void* data = getDataNoThrow(); + if (data == nullptr && + pMemDesc->getShape().isStatic() && + pMemDesc->getShape().getElementsCount() != 0) + IE_THROW() << "Memory has not been allocated"; + return data; +} + void* MemoryMngrWithReuse::getRawPtr() const noexcept { return _data.get(); } diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index 872aa72078831b..92d839a5f0593f 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -161,8 +161,7 @@ class DnnlMemMngrHandle { class IMemory { public: - virtual dnnl::memory GetPrimitive() const = 0; // that might be a pain in the neck, but we still have to support it - virtual dnnl::memory::data_type GetDataType() const = 0; // still better than downcast + virtual ~IMemory() = default; virtual bool isAllocated() const noexcept = 0; @@ -172,40 +171,36 @@ class IMemory { virtual void* GetData() const = 0; // pointer to the actual memory virtual size_t GetSize() const = 0; // in bytes - virtual const Shape& GetShape() const = 0; + virtual const VectorDims& getStaticDims() const = 0; // Redefines descriptor. The memory descriptor will be replaced with the new one. // Memory will not be reallocated if the new tensor size is less or equal the upper bound. // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. - virtual void redefineDesc(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = false) = 0; + virtual void redefineDesc(MemoryDescPtr desc) = 0; virtual void SetData(const IMemory& memory, bool ftz = true) const = 0; - virtual void FillZero() = 0; - virtual const VectorDims& getStaticDims() const = 0; + virtual MemoryMngrPtr getMemoryMngr() const = 0; - virtual bool isUsedExternalStorage() const = 0; + //oneDNN specifics for backward compatibility + virtual dnnl::memory GetPrimitive() const = 0; + virtual dnnl::memory::data_type GetDataType() const = 0; + + virtual void FillZero() = 0; - virtual MemoryMngrPtr getMemoryMngr() const = 0; // returns nullptr when has nothing to return - template ::value && !std::is_reference::value, int>::type = 0, typename std::enable_if::value, int>::type = 0> - std::shared_ptr GetDescWithType() const; // the only not pure method, since it exploits a static polymorphism. Should call getDesc and type cast internally - -protected: - IMemory() = delete; - IMemory(MemoryDescPtr _pMemDesc) : pMemDesc(_pMemDesc) {}; - MemoryDescPtr pMemDesc; + std::shared_ptr GetDescWithType() const; }; class Memory : public IMemory { public: - explicit Memory(const dnnl::engine& eng, MemoryDescPtr pMemDesc, const void* data = nullptr, bool pads_zeroing = true); - explicit Memory(const dnnl::engine& eng, const MemoryDesc& MemDesc, const void* data = nullptr, bool pads_zeroing = true); - Memory(const dnnl::engine& eng, std::unique_ptr mngr, MemoryDescPtr pMemDesc); - Memory(const dnnl::engine& eng, std::unique_ptr mngr, const MemoryDesc& MemDesc); + Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); + Memory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); + Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryMngrPtr mngr); + Memory(const dnnl::engine& eng, const MemoryDesc& desc, MemoryMngrPtr mbgr); Memory(const Memory&) = delete; Memory& operator= (const Memory&) = delete; @@ -215,21 +210,7 @@ class Memory : public IMemory { dnnl::memory GetPrimitive() const override; - bool isAllocated() const noexcept override { - if (mgrHandle->getRawPtr()) { - return true; - } - if (!pMemDesc) { - return false; - } - if (!(pMemDesc->isDefined())) { - return true; - } - if (pMemDesc->getCurrentMemSize() == 0) { - return true; - } - return false; - } + bool isAllocated() const noexcept override; const MemoryDesc& getDesc() const override { return *pMemDesc; @@ -239,23 +220,7 @@ class Memory : public IMemory { return pMemDesc; } - // template ::value && !std::is_reference::value, int>::type = 0, - // typename std::enable_if::value, int>::type = 0> - // std::shared_ptr GetDescWithType() const; - - /** - * Return handler of buffer. Real data may starts from some other offset - * @return - */ - void* GetData() const override { - void* data = getDataNoThrow(); - if (data == nullptr && - pMemDesc->getShape().isStatic() && - pMemDesc->getShape().getElementsCount() != 0) - IE_THROW() << "Memory has not been allocated"; - return data; - } + void* GetData() const override; dnnl::memory::data_type GetDataType() const override { return DnnlExtensionUtils::IEPrecisionToDataType(getDesc().getPrecision()); @@ -267,26 +232,22 @@ class Memory : public IMemory { return getDesc().getShape(); } + const VectorDims& getStaticDims() const override { + return getDesc().getShape().getStaticDims(); + } + // Redefines descriptor. The memory descriptor will be replaced with the new one. // Memory will not be reallocated if the new tensor size is less or equal the upper bound. // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. - void redefineDesc(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = false) override; + void redefineDesc(MemoryDescPtr desc) override; void SetData(const IMemory& memory, bool ftz = true) const override; void FillZero() override; - const VectorDims& getStaticDims() const override { - return getDesc().getShape().getStaticDims(); - } - dnnl::engine getEngine() const { return eng; } - bool isUsedExternalStorage() const override { - return mgrHandle->hasExtBuffer(); - } - MemoryMngrPtr getMemoryMngr() const override { return mgrHandle.get(); } @@ -300,11 +261,8 @@ class Memory : public IMemory { void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); void Create(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); - void Create(const MemoryDesc& desc, MemoryMngrPtr memMgr); - void Create(MemoryDescPtr desc, MemoryMngrPtr memMgr); - private: - // MemoryDescPtr pMemDesc; + MemoryDescPtr pMemDesc; dnnl::engine eng; DnnlMemMngrHandle mgrHandle; bool padsZeroing = true; diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp index f544e7a93fcb8e..f5bbcbb0298a20 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp @@ -60,7 +60,6 @@ void DnnlPostOpsComposer::updateWeiScales() { DnnlBlockedMemoryDesc memoryDesc(InferenceEngine::Precision::FP32, Shape({wei_scale_values.size()})); auto mem = std::make_shared(engine, memoryDesc); - // mem->Create(memoryDesc); memcpy(mem->GetData(), wei_scale_values.data(), wei_scale_values.size() * sizeof(float)); args[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS] = mem; } @@ -74,7 +73,6 @@ void DnnlPostOpsComposer::updateDestScales() { DnnlBlockedMemoryDesc memoryDesc(InferenceEngine::Precision::FP32, Shape({1})); auto mem = std::make_shared(engine, memoryDesc); - // mem->Create(memoryDesc); memcpy(mem->GetData(), &dst_scale_val, sizeof(float)); args[DNNL_ARG_ATTR_SCALES | DNNL_ARG_DST] = mem; } @@ -93,7 +91,6 @@ void DnnlPostOpsComposer::appendBinary(const dnnl::algorithm alg, const std::vec // copy the data as args auto mem = std::make_shared(engine, memoryDesc); - // mem->Create(memoryDesc); memcpy(mem->GetData(), data.data(), data.size() * sizeof(float)); args[DNNL_ARG_ATTR_MULTIPLE_POST_OP(ops.len() - 1) | DNNL_ARG_SRC_1] = mem; } diff --git a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h index b6b007bc8b35ca..33d729bb2c970d 100644 --- a/src/plugins/intel_cpu/src/dnnl_scratch_pad.h +++ b/src/plugins/intel_cpu/src/dnnl_scratch_pad.h @@ -23,8 +23,7 @@ class DnnlScratchPad { } MemoryPtr createScratchPadMem(const MemoryDescPtr& md) { - auto mem = std::make_shared(eng, std::unique_ptr(mgrPtr.get()), md); - // mem->Create(md, mgrPtr); + auto mem = std::make_shared(eng, md, mgrPtr); return mem; } }; diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 1d30f9db4c0a6c..41d8b92f6d896b 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -236,10 +236,8 @@ Edge::ReorderStatus Edge::needReorder() { } void Edge::reuse(MemoryPtr ptr) { - if (status != Status::NeedAllocation) - return; memoryPtr = ptr; - status = Status::Allocated; + changeStatus(Status::Allocated); DEBUG_LOG(*this, " memoryPtr=", memoryPtr); } @@ -252,7 +250,7 @@ int Edge::getOutputNum() const { return child_port; } -void Edge::allocateCommon(const std::function& allocate) { +void Edge::allocateCommon(const std::function& allocate) { if (memoryPtr) IE_THROW() << "Unexpected behaviour: status == NeedAllocation but memory is already allocated."; @@ -261,15 +259,15 @@ void Edge::allocateCommon(const std::functiongetEngine(), std::unique_ptr(memMngr.get()), inputDesc)); - // }; - - // allocateCommon(allocateFunc); - - if (status != Status::NeedAllocation) - return; - - if (memoryPtr) - IE_THROW() << "Unexpected behaviour: status == NeedAllocation but memory is already allocated."; - - auto& inputDesc = getInputDesc(); - auto& outputDesc = getOutputDesc(); - if (!inputDesc.isCompatible(outputDesc)) - IE_THROW() << "Cannot allocate memory for incompatible descriptors."; + auto allocateFunc = [=](const MemoryDesc& inputDesc) -> MemoryPtr { + auto parentPtr = getParent(); + return std::make_shared(parentPtr->getEngine(), inputDesc, memMngr); + }; - auto parentPtr = getParent(); - memoryPtr.reset(new Memory(parentPtr->getEngine(), std::unique_ptr(memMngr.get()), inputDesc)); - - DEBUG_LOG(*this, " memoryPtr=", memoryPtr); - status = Status::Allocated; + allocateCommon(allocateFunc); } std::string Edge::name() const { @@ -343,11 +324,14 @@ void Edge::changeStatus(Edge::Status state) { if (state == Status::Validated) { IE_THROW() << "Incorrect behaviour! Use method validate()"; } - if (status != Status::Uninitialized && state == Status::NeedAllocation) + if (Status::Validated == this->status) { + IE_THROW() << "Unexpected attempt of memory change on edge: " << name(); + } + if (this->status != Status::Uninitialized && state == Status::NeedAllocation) return; - if (status == Status::NotAllocated) + if (this->status == Status::NotAllocated) memoryFromEdge.reset(); - status = state; + this->status = state; } PortDescBaseCPtr Edge::getInputPortDesc() const { @@ -433,14 +417,6 @@ MemoryPtr Edge::getMemoryPtr() const { return memoryPtr; } -void Edge::resetMemoryPtr(MemoryPtr mem) { - if (status == Status::NotAllocated) { - memoryFromEdge.reset(); - } - memoryPtr = mem; - changeStatus(Status::Allocated); -} - void Edge::sharedMemFrom(const EdgePtr &edge) { memoryFromEdge = edge; DEBUG_LOG(*this, " sharedMemFrom ", *edge); diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index c00fd371c17990..0317138bedab71 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -65,7 +65,6 @@ class Edge { const IMemory& getMemory(); MemoryPtr getMemoryPtr() const; - void resetMemoryPtr(MemoryPtr mem); ReorderStatus needReorder(); bool isDropped() const; @@ -108,7 +107,7 @@ class Edge { void collectConsumers(std::vector>& result) const; EdgePtr getBaseEdge(int look = LOOK_BOTH); - void allocateCommon(const std::function& allocate); + void allocateCommon(const std::function& allocate); friend class Graph; }; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 65bb363a141c06..8afc47986eaeb5 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -381,8 +381,8 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { auto baseMemMngr = getChildEdgesAtPort(inplaceOutIndx)[0]->getMemory().getMemoryMngr(); auto memMngr = std::make_shared(baseMemMngr); - auto newMem = std::make_shared(getEngine(), std::unique_ptr(memMngr.get()), selected_pd->getConfig().inConfs[i].getMemDesc()); - parentEdge->resetMemoryPtr(newMem); + auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); + parentEdge->reuse(newMem); } } if (look & Edge::LOOK_UP) { @@ -399,8 +399,8 @@ void Node::resolveInPlaceEdges(Edge::LOOK look) { for (auto& childEdge : childEdges) { IE_ASSERT(childEdge->getStatus() == Edge::Status::NotAllocated) << " Unexpected inplace resolve call to an allocated edge: " << childEdge->name(); - auto newMem = std::make_shared(getEngine(), std::unique_ptr(memMngr.get()), selected_pd->getConfig().outConfs[i].getMemDesc()); - childEdge->resetMemoryPtr(newMem); + auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); + childEdge->reuse(newMem); } } } diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index ea4fa815720a3c..bfb06d1adab95c 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -640,9 +640,9 @@ void Concat::resolveInPlaceEdges(Edge::LOOK look) { IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); auto memMngr = std::make_shared(baseMemMngr, numberOfInputs, i); - auto newMem = std::make_shared(getEngine(), std::unique_ptr(memMngr.get()), selected_pd->getConfig().inConfs[i].getMemDesc()); + auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); - parentEdge->resetMemoryPtr(newMem); + parentEdge->reuse(newMem); } } else { Node::resolveInPlaceEdges(look); diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 1079981a452c15..afbd2c5dab1bc9 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -908,7 +908,7 @@ void Convolution::addZeroPoints(dnnl::primitive_attr& attr) { if (!stockInputZeroPointsMemPtr) { DnnlBlockedMemoryDesc memoryDesc(Precision::I32, {inputZeroPoints.size()}); - stockInputZeroPointsMemPtr.reset(new Memory(getEngine(), memoryDesc, inputZeroPoints.data())); + stockInputZeroPointsMemPtr = std::make_shared(getEngine(), memoryDesc, inputZeroPoints.data()); } } @@ -928,7 +928,7 @@ void Convolution::addLegacyZeroPoints(dnnl::primitive_attr& attr) { if (!legacyWeightsZeroPointsMemPtr) { DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {legacyWeightsZeroPoints.size()}); - legacyWeightsZeroPointsMemPtr.reset(new Memory(getEngine(), memoryDesc, legacyWeightsZeroPoints.data())); + legacyWeightsZeroPointsMemPtr = std::make_shared(getEngine(), memoryDesc, legacyWeightsZeroPoints.data()); } } @@ -938,7 +938,7 @@ void Convolution::addLegacyZeroPoints(dnnl::primitive_attr& attr) { if (!legacyOutputCompensationMemPtr) { DnnlBlockedMemoryDesc memoryDesc(Precision::I32, {legacyOutputCompensation.size()}); - legacyOutputCompensationMemPtr.reset(new Memory(getEngine(), memoryDesc, legacyOutputCompensation.data())); + legacyOutputCompensationMemPtr = std::make_shared(getEngine(), memoryDesc, legacyOutputCompensation.data()); } } } @@ -1525,7 +1525,8 @@ void Convolution::executeDynamicImpl(dnnl::stream strm) { const size_t sumPortNum = getParentEdges().size() - 1; const auto& sumInpMem = getParentEdgesAtPort(sumPortNum).front()->getMemory(); auto inp1 = subgraph->getInput(1); - // inp1->getChildEdgesAtPort(0).front()->getMemoryPtr()->setDataHandle(sumInpMem.GetData()); + auto inp1Mem = inp1->getChildEdgesAtPort(0).front()->getMemoryPtr(); + inp1Mem->getMemoryMngr()->setExtBuff(sumInpMem.GetData(), sumInpMem.GetSize()); subgraph->infer(); diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 898f1048ecb8c9..93756e50a3b63c 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -2418,7 +2418,7 @@ void Eltwise::fuseInto(NodePtr& parentNode) { void Eltwise::appendMemory(const std::vector &data, MemoryPtr &memPtr, std::vector& postOpsMem) { if (!memPtr) { DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {data.size()}); - memPtr.reset(new Memory(getEngine(), memoryDesc, data.data())); + memPtr = std::make_shared(getEngine(), memoryDesc, data.data()); postOpsMem.push_back(memPtr); } } diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp index b4b16e692f6bb8..b911feaf39c639 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp @@ -1837,7 +1837,7 @@ void FakeQuantize::initializePostOpDataLegacy(const VectorDims &dims, const size void FakeQuantize::appendMemory(const size_t dataSize, const void *data, MemoryPtr &memPtr, std::vector& postOpsMem) { if (!memPtr) { DnnlBlockedMemoryDesc memoryDesc(Precision::FP32, {dataSize}); - memPtr.reset(new Memory(getEngine(), memoryDesc, data)); + memPtr = std::make_shared(getEngine(), memoryDesc, data); postOpsMem.push_back(memPtr); } diff --git a/src/plugins/intel_cpu/src/nodes/gather.cpp b/src/plugins/intel_cpu/src/nodes/gather.cpp index 6e7c9e8d615126..42b78630ebedca 100644 --- a/src/plugins/intel_cpu/src/nodes/gather.cpp +++ b/src/plugins/intel_cpu/src/nodes/gather.cpp @@ -607,9 +607,9 @@ void Gather::resolveInPlaceEdges(Edge::LOOK look) { // getName() << " with type " << getTypeStr(); auto memMngr = std::make_shared(baseMemMngr, baseDim, offset); - auto newMem = std::make_shared(getEngine(), std::unique_ptr(memMngr.get()), config.outConfs[outputPort].getMemDesc()); + auto newMem = std::make_shared(getEngine(), config.outConfs[outputPort].getMemDesc(), memMngr); - childEdge->resetMemoryPtr(newMem); + childEdge->reuse(newMem); } } else { Node::resolveInPlaceEdges(look); diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 560c2d8e44a70f..2a97159aa9c4a0 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -273,13 +273,13 @@ void Input::cloneBlobIfRequired() { // but ngraph Constant uses actual bitWidth for data storage allocation // in that case we make a copy to avoid overflow if (constOp->get_byte_size() >= memDesc.getCurrentMemSize()) { - memory = MemoryPtr(new Memory(getEngine(), memDesc, constOp->get_data_ptr())); + memory = std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()); } else { - memory = MemoryPtr(new Memory(getEngine(), memDesc)); + memory = std::make_shared(getEngine(), memDesc); memcpy(memory->GetData(), constOp->get_data_ptr(), constOp->get_byte_size()); } - MemoryPtr ptr = MemoryPtr(new Memory(getEngine(), memDesc)); + MemoryPtr ptr = std::make_shared(getEngine(), memDesc); ptr->SetData(*memory.get(), needFlushDenormalsToZero); return ptr; diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index ca2ee7fad36b3c..020dc98157a052 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -225,7 +225,7 @@ void Reorder::createReorderPrimitive(const dnnl::memory::desc& srcDesc, auto newDesc = dnnl::memory::desc(DnnlExtensionUtils::convertToDnnlDims(newDims), src_blocked->GetDataType(), newFormat); - src_blocked->redefineDesc(DnnlExtensionUtils::makeDescriptor(newDesc), srcPtr, false); + src_blocked = std::make_shared(getEngine(), DnnlExtensionUtils::makeDescriptor(newDesc), srcPtr, false); src_desc = src_blocked->GetPrimitive().get_desc(); } diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index f34ef95b0d89bd..98bbf34bd2c422 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -543,9 +543,9 @@ void Split::resolveInPlaceEdges(Edge::LOOK look) { // getName() << " with type " << getTypeStr(); auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); - auto newMem = std::make_shared(getEngine(), std::unique_ptr(memMngr.get()), selected_pd->getConfig().outConfs[i].getMemDesc()); + auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().outConfs[i].getMemDesc(), memMngr); - childEdge->resetMemoryPtr(newMem); + childEdge->reuse(newMem); } offset += partDim; } diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp index 5075e64ddff46f..a8f17553fc3cfa 100644 --- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp +++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp @@ -312,7 +312,8 @@ void DynamicBuffer::move_buffer(const MemoryPtr& new_buffer) { const auto src_offset_in_byte = stride > 0 ? 0 : (src_stride - valid_size); chunk_offset_in_byte = stride > 0 ? 0 : (dst_stride - valid_size); // reset chunk_offset_in_byte - copy(reinterpret_cast(mem_holder_buffer->GetData()) + src_offset_in_byte, reinterpret_cast(new_buffer->GetData()) + chunk_offset_in_byte, + copy(reinterpret_cast(mem_holder_buffer->GetData()) + src_offset_in_byte, + reinterpret_cast(new_buffer->GetData()) + chunk_offset_in_byte, src_stride, dst_stride, count, valid_size); // assign mem_holder_buffer diff --git a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp index acb937acc173f4..adce5d27f5e5c6 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp @@ -133,10 +133,8 @@ class ReorderCPUTestGraph { reorderNode->addEdge(parentEdge); reorderNode->addEdge(childEdge); - auto parentMemory = std::make_shared(cpuEngine); - auto childMemory = std::make_shared(cpuEngine); - parentMemory->Create(inputDesc, nullptr); - childMemory->Create(outputDesc, nullptr); + auto parentMemory = std::make_shared(cpuEngine, inputDesc); + auto childMemory = std::make_shared(cpuEngine, outputDesc); parentEdge->reuse(parentMemory); childEdge->reuse(childMemory); From 2bf5ceea6846a0d4f1695b5d34476d3cd9c6a066 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 6 Jun 2023 10:37:11 +0200 Subject: [PATCH 16/28] Prevent input memory modification --- src/plugins/intel_cpu/src/edge.cpp | 42 ++++++++++++++++++ src/plugins/intel_cpu/src/edge.h | 1 + src/plugins/intel_cpu/src/graph.cpp | 47 +-------------------- src/plugins/intel_cpu/src/infer_request.cpp | 31 ++++---------- 4 files changed, 53 insertions(+), 68 deletions(-) diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 41d8b92f6d896b..aa1d4c8d6068d5 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -540,5 +540,47 @@ bool Edge::inPlace(LOOK look) const { return false; } +NodePtr Edge::modifiedInPlace() const { + auto childNode = getChild(); + if (childNode && childNode->isInPlace()) { + // check if the children nodes are able to modify the memory + auto childPort = getOutputNum(); + auto inPlaceInputPort = childNode->inPlaceInputPort(childPort); + if (inPlaceInputPort >= 0) { + if (childNode->isExecutable()) { + // Node can modify the memory + return childNode; + } + for (auto&& edge : childNode->getChildEdgesAtPort(inPlaceInputPort)) { + // continue searching + if (auto result = edge->modifiedInPlace()) { + return result; + } + } + } + // check backward dependency + if (auto childSPD = childNode->getSelectedPrimitiveDescriptor()) { + auto& outConfs = childSPD->getConfig().outConfs; + for (size_t i = 0; i < outConfs.size(); ++i) { + const auto& conf = outConfs[i]; + if (childPort >= 0 && conf.inPlace() == childPort) { + if (childNode->isExecutable()) { + // Node can modify the memory + return childNode; + } + for (auto&& edge : childNode->getChildEdgesAtPort(i)) { + // continue searching + if (auto result = edge->modifiedInPlace()) { + return result; + } + } + } + } + } + } + // nothing has been found + return nullptr; +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index 0317138bedab71..90afc3eaafbaf4 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -67,6 +67,7 @@ class Edge { MemoryPtr getMemoryPtr() const; ReorderStatus needReorder(); + NodePtr modifiedInPlace() const; bool isDropped() const; bool isUseExternalMemory() const; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index a2762768aab536..bb850894e7c81f 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -635,54 +635,11 @@ void Graph::InitEdges() { } // secondary pass to eliminate complex implace conflicts - std::function findNodeModifyingMemory; - findNodeModifyingMemory = [&findNodeModifyingMemory](const EdgePtr& edge) -> NodePtr { - auto childNode = edge->getChild(); - if (childNode && childNode->isInPlace()) { - // check if the children nodes are able to modify the memory - auto childPort = edge->getOutputNum(); - auto inPlaceInputPort = childNode->inPlaceInputPort(childPort); - if (inPlaceInputPort >= 0) { - if (childNode->isExecutable()) { - // Node can modify the memory - return childNode; - } - for (auto&& edge : childNode->getChildEdgesAtPort(inPlaceInputPort)) { - // continue searching - if (auto result = findNodeModifyingMemory(edge)) { - return result; - } - } - } - // check backward dependency - if (auto childSPD = childNode->getSelectedPrimitiveDescriptor()) { - auto& outConfs = childSPD->getConfig().outConfs; - for (size_t i = 0; i < outConfs.size(); ++i) { - const auto& conf = outConfs[i]; - if (childPort >= 0 && conf.inPlace() == childPort) { - if (childNode->isExecutable()) { - // Node can modify the memory - return childNode; - } - for (auto&& edge : childNode->getChildEdgesAtPort(i)) { - // continue searching - if (auto result = findNodeModifyingMemory(edge)) { - return result; - } - } - } - } - } - } - // nothing has been found - return nullptr; - }; - - auto needReorder = [&findNodeModifyingMemory](const EdgePtr& edge) -> bool { + auto needReorder = [](const EdgePtr& edge) -> bool { int inNumber = edge->getInputNum(); const auto portChildEdges = edge->getParent()->getChildEdgesAtPort(inNumber); if (portChildEdges.size() > 1) { - if (auto modifyingNode = findNodeModifyingMemory(edge)) { + if (auto modifyingNode = edge->modifiedInPlace()) { auto execIndex = modifyingNode->getExecIndex(); for (auto pEdgePeer : portChildEdges) { if (pEdgePeer == edge) diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index e1cae6347e1f32..6f19ff15837a4a 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -210,7 +210,7 @@ void InferRequestBase::changeDefaultPtr() { if (inputNodePtr->getChildEdgeAt(0)->getMemory().GetData() == static_cast(it.second->buffer())) continue; auto& childEdges = inputNodePtr->getChildEdges(); - // Input cannot be in-place with other primitives + // Perform checks that the user's memory will not be modified bool canBeInPlace = true; for (auto& childEdge : childEdges) { auto ce = childEdge.lock(); @@ -224,36 +224,22 @@ void InferRequestBase::changeDefaultPtr() { break; } - if (child->getType() == Type::Concatenation && child->isInPlace()) { + // the input memory should be referenced by the children, otherwise it should be written to a + // specific location + if (ce->inPlace(Edge::LOOK_DOWN)) { canBeInPlace = false; break; } - // // Cannot be in-place before split because split is using different ptrs without offsets - // if (child->getType() == Type::Split) { - // canBeInPlace = false; - // break; - // } - - if (child->isInPlace() && child->getType() != Type::Split) { + if (auto result = ce->modifiedInPlace()) { canBeInPlace = false; break; } - // auto& edges = child->getChildEdges(); - // for (auto& edge : edges) { - // auto e = edge.lock(); - // if (!e) - // IE_THROW() << "Node " << child->getName() << " contains empty child edge"; - - // if (e->getMemory().GetData() == ce->getMemory().GetData()) { - // canBeInPlace = false; - // break; - // } - // } - - if (!canBeInPlace) + if (child->getType() == Type::Concatenation && child->isInPlace()) { + canBeInPlace = false; break; + } } if (canBeInPlace) { for (auto& edge : childEdges) { @@ -264,7 +250,6 @@ void InferRequestBase::changeDefaultPtr() { changeEdgePtr(e, it.second); } } - continue; } From c00d8a052e267b0ce1e6a355a794d275b930b690 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 6 Jun 2023 15:28:32 +0200 Subject: [PATCH 17/28] Minor build fixes --- src/plugins/intel_cpu/src/cpu_memory.cpp | 6 +++--- src/plugins/intel_cpu/src/edge.h | 2 +- src/plugins/intel_cpu/src/infer_request.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 46e5b4fd88567e..64f7f598a2044c 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -35,10 +35,10 @@ namespace { } // namespace Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, const void* data, bool pads_zeroing) : + pMemDesc(desc), eng(eng), mgrHandle(std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())), this), - dnnlMemHandle(this), - pMemDesc(desc) { + dnnlMemHandle(this) { Create(pMemDesc, data, pads_zeroing); } @@ -46,7 +46,7 @@ Memory::Memory(const dnnl::engine& eng, const MemoryDesc& desc, const void* data Memory::Memory(eng, desc.clone(), data, pads_zeroing) {} Memory::Memory(const dnnl::engine& eng, MemoryDescPtr desc, MemoryMngrPtr mngr) : - eng(eng), pMemDesc(desc), mgrHandle(mngr, this), dnnlMemHandle(this) { + pMemDesc(desc), eng(eng), mgrHandle(mngr, this), dnnlMemHandle(this) { bool memAllocated = mgrHandle->getRawPtr(); Create(desc, nullptr, !memAllocated); diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index 90afc3eaafbaf4..aa934379cf9343 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -67,7 +67,7 @@ class Edge { MemoryPtr getMemoryPtr() const; ReorderStatus needReorder(); - NodePtr modifiedInPlace() const; + std::shared_ptr modifiedInPlace() const; bool isDropped() const; bool isUseExternalMemory() const; diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 6f19ff15837a4a..44b9c3ae2d0ed4 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -225,7 +225,7 @@ void InferRequestBase::changeDefaultPtr() { } // the input memory should be referenced by the children, otherwise it should be written to a - // specific location + // specific location if (ce->inPlace(Edge::LOOK_DOWN)) { canBeInPlace = false; break; From 01c24cda2333dfb3546c8a48966b713b3e0d2aab Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 6 Jun 2023 16:15:39 +0200 Subject: [PATCH 18/28] Fix unittest build --- src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp | 9 +++------ .../intel_cpu/tests/unit/nodes/reorder_node_test.cpp | 6 +++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp b/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp index 26ed9672a70ebf..b5dc44776966d0 100644 --- a/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/dnnl_memory_test.cpp @@ -26,8 +26,7 @@ TEST(MemoryTest, ConcurrentGetPrimitive) { dnnl::memory dnnl_mem1; dnnl::memory dnnl_mem2; auto desc = std::make_shared(Precision::FP32, Shape{10, 2}); - Memory cpu_mem1(eng); - cpu_mem1.Create(desc); + Memory cpu_mem1(eng, desc); std::atomic lock{true}; @@ -55,10 +54,8 @@ TEST(MemoryTest, ConcurrentResizeGetPrimitive) { for (size_t i = 0; i < number_of_attempts; ++i) { dnnl::memory dnnl_mem; auto desc = std::make_shared(Precision::FP32, Shape{10, 2}); - Memory cpu_mem1(eng); - cpu_mem1.Create(desc); - Memory cpu_mem2(eng); - cpu_mem2.Create(desc, cpu_mem1.getMemoryMngr()); // tie two memory objects (memory reuse) + Memory cpu_mem1(eng, desc); + Memory cpu_mem2(eng, desc, cpu_mem1.getMemoryMngr()); auto desc2 = std::make_shared(Precision::FP32, Shape{10, 20}); std::atomic lock{true}; diff --git a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp index adce5d27f5e5c6..e47adbe9c2acbd 100644 --- a/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/nodes/reorder_node_test.cpp @@ -22,8 +22,8 @@ using namespace InferenceEngine; using namespace ov::intel_cpu; namespace ReorderCPUTest { -inline void checkReorder(const ov::intel_cpu::Memory& inputMemory, - const ov::intel_cpu::Memory& outputMemory, +inline void checkReorder(const ov::intel_cpu::IMemory& inputMemory, + const ov::intel_cpu::IMemory& outputMemory, const InferenceEngine::Precision& prescision) { auto srcData = inputMemory.GetData(); auto dstData = outputMemory.GetData(); @@ -68,7 +68,7 @@ inline std::string layoutName(const LayoutType& layout) { return "Unsupported layout type"; } -inline void fillData(const ov::intel_cpu::Memory& inputMemory, const InferenceEngine::Precision& prec) { +inline void fillData(const ov::intel_cpu::IMemory& inputMemory, const InferenceEngine::Precision& prec) { ov::intel_cpu::DnnlMemoryDescPtr dnnlMdInput = inputMemory.GetDescWithType(); const dnnl::impl::memory_desc_wrapper mdInput{dnnlMdInput->getDnnlDesc().get()}; auto elemNum = mdInput.nelems(); From 619045a411e98009e5d23c41921bbbb4791ddeb1 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 6 Jun 2023 17:44:01 +0200 Subject: [PATCH 19/28] Fix for variadic concat --- src/plugins/intel_cpu/src/nodes/concat.cpp | 9 ++++++++- src/plugins/intel_cpu/src/nodes/split.cpp | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/concat.cpp b/src/plugins/intel_cpu/src/nodes/concat.cpp index bfb06d1adab95c..cade97530cc5b4 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.cpp +++ b/src/plugins/intel_cpu/src/nodes/concat.cpp @@ -633,16 +633,23 @@ void Concat::resolveInPlaceEdges(Edge::LOOK look) { auto& config = selected_pd->getConfig(); size_t numberOfInputs = config.inConfs.size(); size_t inplaceOutIndx = selected_pd->getConfig().inConfs[0].inPlace(); + auto baseDim = outputShapes.front().getDims()[axis]; + IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << "Concat node: " << getName() << " can't use inPlace memory with concatenation on dynamic dimension"; auto baseMemMngr = getChildEdgesAtPort(inplaceOutIndx).front()->getMemory().getMemoryMngr(); + ptrdiff_t offset = 0; for (size_t i = 0; i < numberOfInputs; ++i) { + auto partDim = inputShapes[i].getDims()[axis]; + IE_ASSERT(partDim != Shape::UNDEFINED_DIM) << "Concat node: " << getName() << " can't use inPlace memory with concatenation on dynamic dimension"; + auto parentEdge = getParentEdgeAt(i); IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected inplace resolve call to an allocated edge: " << parentEdge->name(); - auto memMngr = std::make_shared(baseMemMngr, numberOfInputs, i); + auto memMngr = std::make_shared(baseMemMngr, baseDim, offset, partDim); auto newMem = std::make_shared(getEngine(), selected_pd->getConfig().inConfs[i].getMemDesc(), memMngr); parentEdge->reuse(newMem); + offset += partDim; } } else { Node::resolveInPlaceEdges(look); diff --git a/src/plugins/intel_cpu/src/nodes/split.cpp b/src/plugins/intel_cpu/src/nodes/split.cpp index 98bbf34bd2c422..2e186309d50806 100644 --- a/src/plugins/intel_cpu/src/nodes/split.cpp +++ b/src/plugins/intel_cpu/src/nodes/split.cpp @@ -531,12 +531,12 @@ void Split::resolveInPlaceEdges(Edge::LOOK look) { size_t numberOfOutputs = config.outConfs.size(); size_t inplaceInpIndx = selected_pd->getConfig().outConfs[0].inPlace(); auto baseDim = inputShapes.front().getDims()[axis]; - IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << "Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimention"; + IE_ASSERT(baseDim != Shape::UNDEFINED_DIM) << "Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimension"; auto baseMemMngr = getParentEdgesAtPort(inplaceInpIndx).front()->getMemory().getMemoryMngr(); ptrdiff_t offset = 0; for (size_t i = 0; i < numberOfOutputs; ++i) { auto partDim = outputShapes[i].getDims()[axis]; - IE_ASSERT(partDim != Shape::UNDEFINED_DIM) << "Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimention"; + IE_ASSERT(partDim != Shape::UNDEFINED_DIM) << "Split node: " << getName() << " can not use inPlace memory with splitting on dynamic dimension"; const auto& childEdges = getChildEdgesAtPort(i); for (auto& childEdge : childEdges) { // IE_ASSERT(parentEdge->getStatus() == Edge::Status::NotAllocated) << "Unexpected edge status in node: " << From 887df92e075da502ce11d9f296eacfefba640dbd Mon Sep 17 00:00:00 2001 From: jialipen Date: Fri, 9 Jun 2023 00:34:17 +0800 Subject: [PATCH 20/28] initiate zero-copy outputy. --- src/plugins/intel_cpu/src/cpu_memory.cpp | 8 +++ src/plugins/intel_cpu/src/cpu_memory.h | 14 ++-- src/plugins/intel_cpu/src/graph.cpp | 27 ++++++- src/plugins/intel_cpu/src/infer_request.cpp | 30 ++++++-- src/plugins/intel_cpu/src/infer_request.h | 4 ++ src/plugins/intel_cpu/src/output_mem_mgr.cpp | 71 +++++++++++++++++++ src/plugins/intel_cpu/src/output_mem_mgr.h | 58 +++++++++++++++ .../intel_cpu/src/partitioned_mem_mgr.cpp | 8 ++- 8 files changed, 206 insertions(+), 14 deletions(-) create mode 100644 src/plugins/intel_cpu/src/output_mem_mgr.cpp create mode 100644 src/plugins/intel_cpu/src/output_mem_mgr.h diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 64f7f598a2044c..2d57ab5d9b174a 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -17,6 +17,7 @@ #include "memory_desc/dnnl_blocked_memory_desc.h" #include "nodes/reorder.h" #include "memory_desc/cpu_memory_desc.h" +#include "output_mem_mgr.h" using namespace InferenceEngine; using namespace dnnl; @@ -115,6 +116,13 @@ void Memory::redefineDesc(MemoryDescPtr desc) { IE_THROW() << "Can not reset descriptor, memory upper bound is unknown."; } + // TODO: how elegantly + const auto memMngr = getMemoryMngr(); + auto outMemMngr = std::dynamic_pointer_cast(memMngr); + if (outMemMngr != nullptr) { + outMemMngr->setMemDesc(desc); + } + this->Create(desc, nullptr, false); } diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index 92d839a5f0593f..b326792a45f416 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -116,14 +116,15 @@ class DnnlMemoryMngr : public IMemoryMngrObserver { std::unique_ptr _pMemMngr; }; -using MemoryMngrPtr = std::shared_ptr; -using MemoryMngrCPtr = std::shared_ptr; +using MemoryMngrPtr = std::shared_ptr; +using MemoryMngrCPtr = std::shared_ptr; class DnnlMemMngrHandle { public: DnnlMemMngrHandle(MemoryMngrPtr pMgr, Memory* pMem) : _pMgr(pMgr), _pMem(pMem) { - if (_pMgr) { - _pMgr->registerMemory(_pMem); + auto pMgrObs = std::dynamic_pointer_cast(pMgr); + if (pMgrObs) { + pMgrObs->registerMemory(_pMem); } } @@ -141,8 +142,9 @@ class DnnlMemMngrHandle { } ~DnnlMemMngrHandle() { - if (_pMgr) { - _pMgr->unregisterMemory(_pMem); + auto pMgrObs = std::dynamic_pointer_cast(_pMgr); + if (pMgrObs) { + pMgrObs->unregisterMemory(_pMem); } } diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index bb850894e7c81f..e097f0cd4f5d94 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -56,6 +56,8 @@ # include #endif +#include "output_mem_mgr.h" + using namespace dnnl; using namespace InferenceEngine; using namespace InferenceEngine::details; @@ -883,9 +885,30 @@ void Graph::AllocateWithReuse() { } } for (auto& group : groups) { - auto grpMemMngr = - std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); + MemoryMngrPtr grpMemMngr; + // deternmine a group with outputs. + size_t isOutGrp = 0; for (auto& box : group) { + for (auto& edge : edge_clusters[box.id]) { + if (edge->getChild()->getType() == Type::Output) { + isOutGrp++; + break; + } + } + } + if (isOutGrp) { + IE_ASSERT(isOutGrp==1); // reuse_io_tensors false + grpMemMngr = + std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); + } else { + grpMemMngr = + std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); + } + for (auto& box : group) { + bool isOutput = false; + for (auto& edge : edge_clusters[box.id]) { + isOutput |= edge->getChild()->getType() == Type::Output; + } for (auto& edge : edge_clusters[box.id]) { if (edge->getStatus() == Edge::Status::NeedAllocation) { edge->allocate(grpMemMngr); diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 44b9c3ae2d0ed4..0fe257d303109c 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -257,6 +257,21 @@ void InferRequestBase::changeDefaultPtr() { auto output = outputNodesMap.find(it.first); if (output != outputNodesMap.end()) { auto parentEdge = output->second->getParentEdgeAt(0); + + if (graph->hasDynamicInput()) { // TODO: internal dynamism + bool canBeInPlace = true; + // TODO: filter + + if (canBeInPlace) { + changeEdgePtr(parentEdge, it.second); + outputMemMngrs[it.first]->setAllocator(outputAllocators[it.first]); + } else { + outputMemMngrs[it.first]->setAllocator(nullptr); + } + + continue; + } + if (parentEdge->getMemory().GetData() == static_cast(it.second->buffer())) continue; @@ -615,7 +630,7 @@ InferRequest::InferRequest(const std::vector>& i modelInputsMap[ov::op::util::get_ie_output_name(ngraph::Output(in))] = in; } for (const std::shared_ptr& out : outputs) { - modelOutputsMap[ov::op::util::get_ie_output_name(out->input_value(0))] = out; + modelOutputsMap[ov::op::util::get_ie_output_name(out->input_value(0))] = out; } CreateInferRequest(); @@ -626,7 +641,14 @@ void InferRequest::initBlobs() { InferRequest::GetBlob(it.first); } for (const auto& it : modelOutputsMap) { - InferRequest::GetBlob(it.first); + auto outblob = InferRequest::GetBlob(it.first); + + outputAllocators[it.first] = std::make_shared(outblob); + + const auto parent_mem = graph->getOutputNodeByName(it.first)->getParentEdgesAtPort(0)[0]->getMemoryPtr(); + const auto memMngr = parent_mem->getMemoryMngr(); + IE_ASSERT(memMngr); + outputMemMngrs[it.first] = std::dynamic_pointer_cast(memMngr); } } @@ -830,8 +852,8 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { } _outputs[name] = data; - if (!isDynamic && !externalPtr.count(name) && - data->getTensorDesc() == MemoryDescUtils::convertToTensorDesc(output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc()) && + if (!externalPtr.count(name) && + ((!isDynamic && data->getTensorDesc() == MemoryDescUtils::convertToTensorDesc(output->second->getParentEdgesAtPort(0)[0]->getMemory().getDesc())) || isDynamic) && !graph->getConfig().batchLimit) { externalPtr[name] = data; } diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h index b1e98f97152752..3c93d743913829 100644 --- a/src/plugins/intel_cpu/src/infer_request.h +++ b/src/plugins/intel_cpu/src/infer_request.h @@ -9,6 +9,7 @@ #include #include #include +#include "output_mem_mgr.h" namespace ov { namespace intel_cpu { @@ -70,6 +71,9 @@ class InferRequestBase : public InferenceEngine::IInferRequestInternal { protected: virtual void changeDefaultPtr(); + + std::unordered_map outputAllocators; + std::unordered_map outputMemMngrs; // reference to the memmanager }; class LegacyInferRequest : public InferRequestBase { diff --git a/src/plugins/intel_cpu/src/output_mem_mgr.cpp b/src/plugins/intel_cpu/src/output_mem_mgr.cpp new file mode 100644 index 00000000000000..c7cdf288846797 --- /dev/null +++ b/src/plugins/intel_cpu/src/output_mem_mgr.cpp @@ -0,0 +1,71 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "output_mem_mgr.h" + +using namespace ov::intel_cpu; + +void* OutputMemoryMngr::getRawPtr() const noexcept { + if (m_allocator) { + return _data; + } else { + return _pMemMngr->getRawPtr(); + } +} + +void OutputMemoryMngr::setExtBuff(void* ptr, size_t size) { + if (m_allocator) { + return; + } else { + return _pMemMngr->setExtBuff(ptr, size); + } +} + +bool OutputMemoryMngr::resize(size_t size) { + if (m_allocator) { + constexpr int cacheLineSize = 64; + bool sizeChanged = false; + if (size > _memUpperBound) { + m_allocator->setMemDesc(m_memDesc); + _data = m_allocator->allocate(size, cacheLineSize); + _memUpperBound = size; + sizeChanged = true; + } + return sizeChanged; + } else { + return _pMemMngr->resize(size); + } +} + +bool OutputMemoryMngr::hasExtBuffer() const noexcept { + return true; +} + +void OutputMemoryMngr::setMemDesc(MemoryDescPtr desc) { + m_memDesc = desc; + return; +} + +void* OutputAllocator::allocate(const size_t bytes, const size_t alignment) { + (void)alignment; + const auto actualDesc = MemoryDescUtils::convertToTensorDesc(*m_memDesc.get()); + IE_ASSERT(m_memDesc->getCurrentMemSize()==bytes); + + auto ¤tDesc = m_blob->getTensorDesc(); + const auto outDims = actualDesc.getDims(); + if (currentDesc.getDims() != outDims) { + // WA: because input/output info initially contains non empty dims, order etc. + // and setDims (called inside setShape) can't correct modify blocked desc for desc with blocked layout + if (currentDesc.getLayout() == InferenceEngine::Layout::BLOCKED) { + currentDesc = InferenceEngine::TensorDesc(currentDesc.getPrecision(), currentDesc.getLayout()); + } + m_blob->setShape(outDims); + } + return m_blob->buffer(); +} + +void OutputAllocator::setMemDesc(MemoryDescPtr desc) { + m_memDesc = desc; + return; +} \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/output_mem_mgr.h b/src/plugins/intel_cpu/src/output_mem_mgr.h new file mode 100644 index 00000000000000..a7786b11565502 --- /dev/null +++ b/src/plugins/intel_cpu/src/output_mem_mgr.h @@ -0,0 +1,58 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cpu_memory.h" +#include "openvino/runtime/allocator.hpp" +#include "memory_desc/cpu_memory_desc.h" + +namespace ov { +namespace intel_cpu { + +class OutputAllocator : public Allocator { +public: + OutputAllocator(InferenceEngine::Blob::Ptr blob) : m_blob{blob} {} + + void* allocate(const size_t bytes, const size_t alignment = alignof(max_align_t)); + void setMemDesc(MemoryDescPtr desc); +private: + InferenceEngine::Blob::Ptr m_blob; + MemoryDescPtr m_memDesc; +}; +using OutputAllocatorPtr = std::shared_ptr; +using OutputAllocatorCPtr = std::shared_ptr; + + +class OutputMemoryMngr : public IMemoryMngr { +public: + explicit OutputMemoryMngr(std::unique_ptr mngr) : _pMemMngr(std::move(mngr)) {} + // OutputMemoryMngr(OutputAllocatorPtr allocator) : m_allocator{allocator} {} + + ~OutputMemoryMngr() { + // m_allocator.deallocate(m_ptr, get_byte_size()); + } + + void* getRawPtr() const noexcept override; + void setExtBuff(void* ptr, size_t size) override; + bool resize(size_t size) override; + bool hasExtBuffer() const noexcept override; + + void setAllocator(OutputAllocatorPtr allocator) { m_allocator = allocator; } + void setMemDesc(MemoryDescPtr desc); + +private: + OutputAllocatorPtr m_allocator; + MemoryDescPtr m_memDesc; + size_t _memUpperBound = 0ul; + void* _data; + // We need the default MemMngr as may fallback to copy output... and + // we have no idea of this in early stages of graph memory allocation. + std::unique_ptr _pMemMngr; +}; +using OutputMemoryMngrPtr = std::shared_ptr; +using OutputMemoryMngrCPtr = std::shared_ptr; + +} // namespace intel_cpu +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp index a712cbcd2749bd..1a40422fb4ef54 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp @@ -24,10 +24,14 @@ bool PartitionedMemoryMngr::hasExtBuffer() const noexcept { } void PartitionedMemoryMngr::registerMemory(Memory* memPtr) { - m_pMngr->registerMemory(memPtr); + auto pMgrObs = std::dynamic_pointer_cast(m_pMngr); + if (pMgrObs) + pMgrObs->registerMemory(memPtr); } void PartitionedMemoryMngr::unregisterMemory(Memory* memPtr) { - m_pMngr->unregisterMemory(memPtr); + auto pMgrObs = std::dynamic_pointer_cast(m_pMngr); + if (pMgrObs) + pMgrObs->unregisterMemory(memPtr); } From 05e33e00791a7e0ba1752a7ebd7a7f2490d76ed3 Mon Sep 17 00:00:00 2001 From: jialipen Date: Fri, 9 Jun 2023 17:24:58 +0800 Subject: [PATCH 21/28] fix --- src/plugins/intel_cpu/src/graph.cpp | 9 +++------ src/plugins/intel_cpu/src/infer_request.cpp | 2 ++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index e097f0cd4f5d94..f29b05e705d49e 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -899,16 +899,13 @@ void Graph::AllocateWithReuse() { if (isOutGrp) { IE_ASSERT(isOutGrp==1); // reuse_io_tensors false grpMemMngr = - std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); + std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); } else { grpMemMngr = - std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); + std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); } + std::cout << "grpMemMngr" << grpMemMngr << "" << std::dynamic_pointer_cast(grpMemMngr) <<"" << std::endl; for (auto& box : group) { - bool isOutput = false; - for (auto& edge : edge_clusters[box.id]) { - isOutput |= edge->getChild()->getType() == Type::Output; - } for (auto& edge : edge_clusters[box.id]) { if (edge->getStatus() == Edge::Status::NeedAllocation) { edge->allocate(grpMemMngr); diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 0fe257d303109c..fc8e5182e3fee7 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -649,6 +649,8 @@ void InferRequest::initBlobs() { const auto memMngr = parent_mem->getMemoryMngr(); IE_ASSERT(memMngr); outputMemMngrs[it.first] = std::dynamic_pointer_cast(memMngr); + std::cout << "memMngr" << memMngr << std::endl; + IE_ASSERT(outputMemMngrs[it.first]); } } From 8965eb908836a510ec323d1f36fb85bf616df51e Mon Sep 17 00:00:00 2001 From: jialipen Date: Fri, 9 Jun 2023 18:05:49 +0800 Subject: [PATCH 22/28] fix --- src/plugins/intel_cpu/src/infer_request.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index fc8e5182e3fee7..ad93ed1372a43e 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -261,7 +261,8 @@ void InferRequestBase::changeDefaultPtr() { if (graph->hasDynamicInput()) { // TODO: internal dynamism bool canBeInPlace = true; // TODO: filter - + IE_ASSERT(outputMemMngrs[it.first]); + IE_ASSERT(outputAllocators[it.first]); if (canBeInPlace) { changeEdgePtr(parentEdge, it.second); outputMemMngrs[it.first]->setAllocator(outputAllocators[it.first]); @@ -649,8 +650,6 @@ void InferRequest::initBlobs() { const auto memMngr = parent_mem->getMemoryMngr(); IE_ASSERT(memMngr); outputMemMngrs[it.first] = std::dynamic_pointer_cast(memMngr); - std::cout << "memMngr" << memMngr << std::endl; - IE_ASSERT(outputMemMngrs[it.first]); } } From 6ea25c0917f77f1733f7974e64eaefdf6abeb9e3 Mon Sep 17 00:00:00 2001 From: ceciliapeng Date: Fri, 9 Jun 2023 01:29:18 -0700 Subject: [PATCH 23/28] fix --- src/plugins/intel_cpu/src/infer_request.cpp | 12 +++++++++++- src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp | 9 +++++++++ src/plugins/intel_cpu/src/partitioned_mem_mgr.h | 3 +++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index ad93ed1372a43e..d9faae4887bc7a 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -25,6 +25,7 @@ #include "memory_desc/dnnl_blocked_memory_desc.h" #include #include +#include "partitioned_mem_mgr.h" namespace ov { namespace intel_cpu { @@ -649,7 +650,16 @@ void InferRequest::initBlobs() { const auto parent_mem = graph->getOutputNodeByName(it.first)->getParentEdgesAtPort(0)[0]->getMemoryPtr(); const auto memMngr = parent_mem->getMemoryMngr(); IE_ASSERT(memMngr); - outputMemMngrs[it.first] = std::dynamic_pointer_cast(memMngr); + + OutputMemoryMngrPtr outMemMngr; + outMemMngr = std::dynamic_pointer_cast(memMngr); + if (!outMemMngr) { + auto partiMemMngr = std::dynamic_pointer_cast(memMngr); + if (partiMemMngr) { + outMemMngr = std::dynamic_pointer_cast(partiMemMngr->getBaseMemMngr()); + } + } + outputMemMngrs[it.first] = outMemMngr; } } diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp index 1a40422fb4ef54..b19b1e6d77f923 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp @@ -35,3 +35,12 @@ void PartitionedMemoryMngr::unregisterMemory(Memory* memPtr) { pMgrObs->unregisterMemory(memPtr); } +MemoryMngrPtr PartitionedMemoryMngr::getBaseMemMngr() const noexcept { + const auto pMngr = std::dynamic_pointer_cast(m_pMngr); + std::cout << "iteratively getBaseMemMngr()" << this << std::endl; + if (pMngr) + return pMngr->getBaseMemMngr(); + else + return m_pMngr; +} + diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h index 94f5f9288d27f8..971f29360525fe 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h @@ -13,6 +13,7 @@ class PartitionedMemoryMngr : public IMemoryMngrObserver { public: PartitionedMemoryMngr(MemoryMngrPtr pMngr, size_t total_blocks = 1, ptrdiff_t offset_blocks = 0, size_t size_blocks = 1) : m_pMngr(pMngr), m_total_blocks(total_blocks), m_offset_blocks(offset_blocks), m_size_blocks(size_blocks) { + std::cout << "baseMemMngr " << m_pMngr << std::endl; IE_ASSERT(m_pMngr) << "Memory manager is uninitialized"; } @@ -23,6 +24,8 @@ class PartitionedMemoryMngr : public IMemoryMngrObserver { void registerMemory(Memory* memPtr) override; void unregisterMemory(Memory* memPtr) override; + MemoryMngrPtr getBaseMemMngr() const noexcept; + private: MemoryMngrPtr m_pMngr; size_t m_total_blocks = 1; // size of the parent memory in blocks From 8ab3f785ee0afcd916266f2431858bbdc4c79f98 Mon Sep 17 00:00:00 2001 From: ceciliapeng Date: Mon, 12 Jun 2023 22:20:21 -0700 Subject: [PATCH 24/28] refactor --- src/plugins/intel_cpu/src/cpu_memory.cpp | 8 ---- src/plugins/intel_cpu/src/cpu_memory.h | 14 +++--- src/plugins/intel_cpu/src/graph.cpp | 33 +++++++++----- src/plugins/intel_cpu/src/graph.h | 2 + src/plugins/intel_cpu/src/infer_request.cpp | 38 ++++++++-------- src/plugins/intel_cpu/src/infer_request.h | 2 +- src/plugins/intel_cpu/src/node.cpp | 4 +- src/plugins/intel_cpu/src/node.h | 2 + src/plugins/intel_cpu/src/output_mem_mgr.cpp | 44 +++++-------------- src/plugins/intel_cpu/src/output_mem_mgr.h | 37 +++++----------- .../intel_cpu/src/partitioned_mem_mgr.cpp | 17 +------ .../intel_cpu/src/partitioned_mem_mgr.h | 3 -- 12 files changed, 78 insertions(+), 126 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp index 2d57ab5d9b174a..64f7f598a2044c 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.cpp +++ b/src/plugins/intel_cpu/src/cpu_memory.cpp @@ -17,7 +17,6 @@ #include "memory_desc/dnnl_blocked_memory_desc.h" #include "nodes/reorder.h" #include "memory_desc/cpu_memory_desc.h" -#include "output_mem_mgr.h" using namespace InferenceEngine; using namespace dnnl; @@ -116,13 +115,6 @@ void Memory::redefineDesc(MemoryDescPtr desc) { IE_THROW() << "Can not reset descriptor, memory upper bound is unknown."; } - // TODO: how elegantly - const auto memMngr = getMemoryMngr(); - auto outMemMngr = std::dynamic_pointer_cast(memMngr); - if (outMemMngr != nullptr) { - outMemMngr->setMemDesc(desc); - } - this->Create(desc, nullptr, false); } diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h index b326792a45f416..92d839a5f0593f 100644 --- a/src/plugins/intel_cpu/src/cpu_memory.h +++ b/src/plugins/intel_cpu/src/cpu_memory.h @@ -116,15 +116,14 @@ class DnnlMemoryMngr : public IMemoryMngrObserver { std::unique_ptr _pMemMngr; }; -using MemoryMngrPtr = std::shared_ptr; -using MemoryMngrCPtr = std::shared_ptr; +using MemoryMngrPtr = std::shared_ptr; +using MemoryMngrCPtr = std::shared_ptr; class DnnlMemMngrHandle { public: DnnlMemMngrHandle(MemoryMngrPtr pMgr, Memory* pMem) : _pMgr(pMgr), _pMem(pMem) { - auto pMgrObs = std::dynamic_pointer_cast(pMgr); - if (pMgrObs) { - pMgrObs->registerMemory(_pMem); + if (_pMgr) { + _pMgr->registerMemory(_pMem); } } @@ -142,9 +141,8 @@ class DnnlMemMngrHandle { } ~DnnlMemMngrHandle() { - auto pMgrObs = std::dynamic_pointer_cast(_pMgr); - if (pMgrObs) { - pMgrObs->unregisterMemory(_pMem); + if (_pMgr) { + _pMgr->unregisterMemory(_pMem); } } diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index f29b05e705d49e..b0defc42c7f400 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -886,30 +886,43 @@ void Graph::AllocateWithReuse() { } for (auto& group : groups) { MemoryMngrPtr grpMemMngr; + grpMemMngr = + std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); + // deternmine a group with outputs. size_t isOutGrp = 0; + int64_t outBoxId = -1; for (auto& box : group) { - for (auto& edge : edge_clusters[box.id]) { - if (edge->getChild()->getType() == Type::Output) { + if (std::any_of( + edge_clusters[box.id].begin(), + edge_clusters[box.id].end(), + [box](const ov::intel_cpu::EdgePtr edge) { + return edge->getChild()->getType() == Type::Output; + })) { isOutGrp++; - break; - } + outBoxId = box.id; } } if (isOutGrp) { IE_ASSERT(isOutGrp==1); // reuse_io_tensors false grpMemMngr = - std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); - } else { - grpMemMngr = - std::make_shared(std::unique_ptr(new MemoryMngrWithReuse())); + std::make_shared(grpMemMngr); + DEBUG_LOG(grpMemMngr); + + // Store the output memory managers. + // So that, the infer requests can be able to get access to them. + for (auto& edge : edge_clusters[outBoxId]) { + if (edge->getChild()->getType() == Type::Output) { + outputNodesMemMngrMap[edge->getParent()->getName()] = grpMemMngr; + } + } } - std::cout << "grpMemMngr" << grpMemMngr << "" << std::dynamic_pointer_cast(grpMemMngr) <<"" << std::endl; for (auto& box : group) { for (auto& edge : edge_clusters[box.id]) { if (edge->getStatus() == Edge::Status::NeedAllocation) { edge->allocate(grpMemMngr); } + if (isOutGrp) edge->getParent()->forceUpdateShape = true; // force recheck shape updates for nodes in the output groups. } } } @@ -1335,12 +1348,12 @@ inline void Graph::ExecuteNode(const NodePtr& node, const dnnl::stream& stream) OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, node->profiling.execute); + DEBUG_LOG(*node, " exec_graph ", this); if (node->isDynamicNode()) { node->executeDynamic(stream); } else { node->execute(stream); } - DEBUG_LOG(*node); } void Graph::Infer(InferRequestBase* request) { diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 75f56c1688bf54..bd1edcc9e300fe 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -257,6 +257,8 @@ class Graph { std::map inputNodesMap; std::map outputNodesMap; + std::map outputNodesMemMngrMap; + // these node pointers (from graphNodes) are to avoid regular checking for // constantness of nodes in Infer methods and calls of // non-executable (optimized out) nodes, such as Input, Reshape, etc. diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index d9faae4887bc7a..5fcdb120386128 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -27,6 +27,12 @@ #include #include "partitioned_mem_mgr.h" +#include "ie_allocator.hpp" // IE public header +// #include "openvino/core/except.hpp" +// #include "openvino/runtime/allocator.hpp" +// #include "openvino/runtime/common.hpp" + + namespace ov { namespace intel_cpu { @@ -265,7 +271,6 @@ void InferRequestBase::changeDefaultPtr() { IE_ASSERT(outputMemMngrs[it.first]); IE_ASSERT(outputAllocators[it.first]); if (canBeInPlace) { - changeEdgePtr(parentEdge, it.second); outputMemMngrs[it.first]->setAllocator(outputAllocators[it.first]); } else { outputMemMngrs[it.first]->setAllocator(nullptr); @@ -632,7 +637,7 @@ InferRequest::InferRequest(const std::vector>& i modelInputsMap[ov::op::util::get_ie_output_name(ngraph::Output(in))] = in; } for (const std::shared_ptr& out : outputs) { - modelOutputsMap[ov::op::util::get_ie_output_name(out->input_value(0))] = out; + modelOutputsMap[ov::op::util::get_ie_output_name(out->input_value(0))] = out; } CreateInferRequest(); @@ -643,23 +648,17 @@ void InferRequest::initBlobs() { InferRequest::GetBlob(it.first); } for (const auto& it : modelOutputsMap) { - auto outblob = InferRequest::GetBlob(it.first); - - outputAllocators[it.first] = std::make_shared(outblob); - - const auto parent_mem = graph->getOutputNodeByName(it.first)->getParentEdgesAtPort(0)[0]->getMemoryPtr(); - const auto memMngr = parent_mem->getMemoryMngr(); - IE_ASSERT(memMngr); - - OutputMemoryMngrPtr outMemMngr; - outMemMngr = std::dynamic_pointer_cast(memMngr); - if (!outMemMngr) { - auto partiMemMngr = std::dynamic_pointer_cast(memMngr); - if (partiMemMngr) { - outMemMngr = std::dynamic_pointer_cast(partiMemMngr->getBaseMemMngr()); - } + outputAllocators[it.first] = InferenceEngine::CreateDefaultAllocator(); + InferRequest::GetBlob(it.first); + + const auto &outMemMngrMap = graph->outputNodesMemMngrMap; + auto itr = outMemMngrMap.find(it.first); + if (itr != outMemMngrMap.end()) { + OutputMemoryMngrPtr outMemMngr; + outMemMngr = std::dynamic_pointer_cast(itr->second); + IE_ASSERT(outMemMngr); + outputMemMngrs[it.first] = outMemMngr; } - outputMemMngrs[it.first] = outMemMngr; } } @@ -799,7 +798,6 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(inputNode->second->get_output_element_type(0)), dims, InferenceEngine::TensorDesc::getLayoutByRank(dims.size())); - _inputs[name] = make_blob_with_precision(desc); _inputs[name]->allocate(); @@ -835,7 +833,7 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { InferenceEngine::TensorDesc desc(InferenceEngine::details::convertPrecision(outputNode->second->get_input_element_type(0)), dims, InferenceEngine::TensorDesc::getLayoutByRank(dims.size())); - data = make_blob_with_precision(desc); + data = make_blob_with_precision(desc, outputAllocators[name]); data->allocate(); } else { const auto& blobDims = data->getTensorDesc().getDims(); diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h index 3c93d743913829..cddc56ecf4c4e9 100644 --- a/src/plugins/intel_cpu/src/infer_request.h +++ b/src/plugins/intel_cpu/src/infer_request.h @@ -72,7 +72,7 @@ class InferRequestBase : public InferenceEngine::IInferRequestInternal { protected: virtual void changeDefaultPtr(); - std::unordered_map outputAllocators; + std::unordered_map> outputAllocators; std::unordered_map outputMemMngrs; // reference to the memmanager }; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 8afc47986eaeb5..57cf9d436e1772 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -565,7 +565,7 @@ std::vector Node::getAvailableFormatsForDims(const Shape &di void Node::updateShapes() { IE_ASSERT(isDynamicNode()) << "Node::updateShapes() is called to a static shape node of type: " << getTypeStr() << " with name: " << getName(); - if (needShapeInfer()) { + if (needShapeInfer() || forceUpdateShape) { auto result = shapeInfer(); if (ShapeInferStatus::success == result.status) { redefineOutputMemory(result.dims); @@ -618,7 +618,7 @@ void Node::redefineOutputMemory(const std::vector &newOutputShapes) } const auto &currDesc = edges[0]->getMemory().getDesc(); - if (currDesc.getShape().isStatic() && currDesc.getShape().getStaticDims() == newOutputShape) + if (currDesc.getShape().isStatic() && currDesc.getShape().getStaticDims() == newOutputShape && !forceUpdateShape) continue; const bool hasZeroDims = std::count(std::begin(newOutputShape), std::end(newOutputShape), 0) > 0; diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 8a9c3c9b74f3f1..27ac3c44257d26 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -730,6 +730,8 @@ class Node { #ifdef CPU_DEBUG_CAPS friend class Verbose; #endif +public: + bool forceUpdateShape = false; }; template diff --git a/src/plugins/intel_cpu/src/output_mem_mgr.cpp b/src/plugins/intel_cpu/src/output_mem_mgr.cpp index c7cdf288846797..3bd85dfc79baa4 100644 --- a/src/plugins/intel_cpu/src/output_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/output_mem_mgr.cpp @@ -10,31 +10,27 @@ void* OutputMemoryMngr::getRawPtr() const noexcept { if (m_allocator) { return _data; } else { - return _pMemMngr->getRawPtr(); + return m_pMngr->getRawPtr(); } } void OutputMemoryMngr::setExtBuff(void* ptr, size_t size) { - if (m_allocator) { - return; - } else { - return _pMemMngr->setExtBuff(ptr, size); - } + IE_ASSERT(!m_allocator); // FIXME: shouldn't set extbuff when there is an allocator? + return m_pMngr->setExtBuff(ptr, size); } bool OutputMemoryMngr::resize(size_t size) { if (m_allocator) { - constexpr int cacheLineSize = 64; + // constexpr int cacheLineSize = 64; bool sizeChanged = false; if (size > _memUpperBound) { - m_allocator->setMemDesc(m_memDesc); - _data = m_allocator->allocate(size, cacheLineSize); + _data = m_allocator->alloc(size); _memUpperBound = size; sizeChanged = true; } return sizeChanged; } else { - return _pMemMngr->resize(size); + return m_pMngr->resize(size); } } @@ -42,30 +38,10 @@ bool OutputMemoryMngr::hasExtBuffer() const noexcept { return true; } -void OutputMemoryMngr::setMemDesc(MemoryDescPtr desc) { - m_memDesc = desc; - return; -} - -void* OutputAllocator::allocate(const size_t bytes, const size_t alignment) { - (void)alignment; - const auto actualDesc = MemoryDescUtils::convertToTensorDesc(*m_memDesc.get()); - IE_ASSERT(m_memDesc->getCurrentMemSize()==bytes); - - auto ¤tDesc = m_blob->getTensorDesc(); - const auto outDims = actualDesc.getDims(); - if (currentDesc.getDims() != outDims) { - // WA: because input/output info initially contains non empty dims, order etc. - // and setDims (called inside setShape) can't correct modify blocked desc for desc with blocked layout - if (currentDesc.getLayout() == InferenceEngine::Layout::BLOCKED) { - currentDesc = InferenceEngine::TensorDesc(currentDesc.getPrecision(), currentDesc.getLayout()); - } - m_blob->setShape(outDims); - } - return m_blob->buffer(); +void OutputMemoryMngr::registerMemory(Memory* memPtr) { + m_pMngr->registerMemory(memPtr); } -void OutputAllocator::setMemDesc(MemoryDescPtr desc) { - m_memDesc = desc; - return; +void OutputMemoryMngr::unregisterMemory(Memory* memPtr) { + m_pMngr->unregisterMemory(memPtr); } \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/output_mem_mgr.h b/src/plugins/intel_cpu/src/output_mem_mgr.h index a7786b11565502..9a405ce67fc577 100644 --- a/src/plugins/intel_cpu/src/output_mem_mgr.h +++ b/src/plugins/intel_cpu/src/output_mem_mgr.h @@ -7,31 +7,16 @@ #include "cpu_memory.h" #include "openvino/runtime/allocator.hpp" #include "memory_desc/cpu_memory_desc.h" +#include "ie_allocator.hpp" // IE public header +// #include "blob_allocator.hpp" namespace ov { namespace intel_cpu { -class OutputAllocator : public Allocator { +class OutputMemoryMngr : public IMemoryMngrObserver { public: - OutputAllocator(InferenceEngine::Blob::Ptr blob) : m_blob{blob} {} - - void* allocate(const size_t bytes, const size_t alignment = alignof(max_align_t)); - void setMemDesc(MemoryDescPtr desc); -private: - InferenceEngine::Blob::Ptr m_blob; - MemoryDescPtr m_memDesc; -}; -using OutputAllocatorPtr = std::shared_ptr; -using OutputAllocatorCPtr = std::shared_ptr; - - -class OutputMemoryMngr : public IMemoryMngr { -public: - explicit OutputMemoryMngr(std::unique_ptr mngr) : _pMemMngr(std::move(mngr)) {} - // OutputMemoryMngr(OutputAllocatorPtr allocator) : m_allocator{allocator} {} - - ~OutputMemoryMngr() { - // m_allocator.deallocate(m_ptr, get_byte_size()); + explicit OutputMemoryMngr(MemoryMngrPtr pMngr) : m_pMngr(pMngr) { + IE_ASSERT(m_pMngr) << "Memory manager is uninitialized"; } void* getRawPtr() const noexcept override; @@ -39,17 +24,19 @@ class OutputMemoryMngr : public IMemoryMngr { bool resize(size_t size) override; bool hasExtBuffer() const noexcept override; - void setAllocator(OutputAllocatorPtr allocator) { m_allocator = allocator; } - void setMemDesc(MemoryDescPtr desc); + void registerMemory(Memory* memPtr) override; + void unregisterMemory(Memory* memPtr) override; + + void setAllocator(std::shared_ptr allocator) { m_allocator = allocator; } private: - OutputAllocatorPtr m_allocator; - MemoryDescPtr m_memDesc; + std::shared_ptr m_allocator; + size_t _memUpperBound = 0ul; void* _data; // We need the default MemMngr as may fallback to copy output... and // we have no idea of this in early stages of graph memory allocation. - std::unique_ptr _pMemMngr; + MemoryMngrPtr m_pMngr; }; using OutputMemoryMngrPtr = std::shared_ptr; using OutputMemoryMngrCPtr = std::shared_ptr; diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp index b19b1e6d77f923..a712cbcd2749bd 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.cpp @@ -24,23 +24,10 @@ bool PartitionedMemoryMngr::hasExtBuffer() const noexcept { } void PartitionedMemoryMngr::registerMemory(Memory* memPtr) { - auto pMgrObs = std::dynamic_pointer_cast(m_pMngr); - if (pMgrObs) - pMgrObs->registerMemory(memPtr); + m_pMngr->registerMemory(memPtr); } void PartitionedMemoryMngr::unregisterMemory(Memory* memPtr) { - auto pMgrObs = std::dynamic_pointer_cast(m_pMngr); - if (pMgrObs) - pMgrObs->unregisterMemory(memPtr); -} - -MemoryMngrPtr PartitionedMemoryMngr::getBaseMemMngr() const noexcept { - const auto pMngr = std::dynamic_pointer_cast(m_pMngr); - std::cout << "iteratively getBaseMemMngr()" << this << std::endl; - if (pMngr) - return pMngr->getBaseMemMngr(); - else - return m_pMngr; + m_pMngr->unregisterMemory(memPtr); } diff --git a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h index 971f29360525fe..94f5f9288d27f8 100644 --- a/src/plugins/intel_cpu/src/partitioned_mem_mgr.h +++ b/src/plugins/intel_cpu/src/partitioned_mem_mgr.h @@ -13,7 +13,6 @@ class PartitionedMemoryMngr : public IMemoryMngrObserver { public: PartitionedMemoryMngr(MemoryMngrPtr pMngr, size_t total_blocks = 1, ptrdiff_t offset_blocks = 0, size_t size_blocks = 1) : m_pMngr(pMngr), m_total_blocks(total_blocks), m_offset_blocks(offset_blocks), m_size_blocks(size_blocks) { - std::cout << "baseMemMngr " << m_pMngr << std::endl; IE_ASSERT(m_pMngr) << "Memory manager is uninitialized"; } @@ -24,8 +23,6 @@ class PartitionedMemoryMngr : public IMemoryMngrObserver { void registerMemory(Memory* memPtr) override; void unregisterMemory(Memory* memPtr) override; - MemoryMngrPtr getBaseMemMngr() const noexcept; - private: MemoryMngrPtr m_pMngr; size_t m_total_blocks = 1; // size of the parent memory in blocks From 4af3b26c50b486b019a44215096b3b37126c12f8 Mon Sep 17 00:00:00 2001 From: ceciliapeng Date: Mon, 12 Jun 2023 22:32:46 -0700 Subject: [PATCH 25/28] refactor --- src/plugins/intel_cpu/src/graph.h | 2 ++ src/plugins/intel_cpu/src/infer_request.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index bd1edcc9e300fe..710b98550e8042 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -201,6 +201,8 @@ class Graph { dynBatch = newDynBatch; } + Status getDynStatus() const {return status;} + protected: void VisitNode(NodePtr node, std::vector& sortedNodes); diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 5fcdb120386128..9043a450e95add 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -265,7 +265,7 @@ void InferRequestBase::changeDefaultPtr() { if (output != outputNodesMap.end()) { auto parentEdge = output->second->getParentEdgeAt(0); - if (graph->hasDynamicInput()) { // TODO: internal dynamism + if (Graph::Status::ReadyDynamic == graph->getDynStatus()) { bool canBeInPlace = true; // TODO: filter IE_ASSERT(outputMemMngrs[it.first]); From 8c8bffafc08b1e6bf060e0b455788f79447fe39c Mon Sep 17 00:00:00 2001 From: ceciliapeng Date: Mon, 12 Jun 2023 23:47:10 -0700 Subject: [PATCH 26/28] fix --- src/plugins/intel_cpu/src/graph.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index b0defc42c7f400..79a98e06685761 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -912,8 +912,11 @@ void Graph::AllocateWithReuse() { // Store the output memory managers. // So that, the infer requests can be able to get access to them. for (auto& edge : edge_clusters[outBoxId]) { - if (edge->getChild()->getType() == Type::Output) { - outputNodesMemMngrMap[edge->getParent()->getName()] = grpMemMngr; + const auto child = edge->getChild(); + if (child->getType() == Type::Output) { + for (auto &output : outputNodesMap) { + if (output.second == child) outputNodesMemMngrMap[output.first] = grpMemMngr; + } } } } From f02c26ee698a6fdceee1012ed140529a7b1efe14 Mon Sep 17 00:00:00 2001 From: ceciliapeng Date: Tue, 13 Jun 2023 00:45:07 -0700 Subject: [PATCH 27/28] fix --- src/plugins/intel_cpu/src/graph.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 79a98e06685761..d7efa45545eab6 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -925,7 +925,8 @@ void Graph::AllocateWithReuse() { if (edge->getStatus() == Edge::Status::NeedAllocation) { edge->allocate(grpMemMngr); } - if (isOutGrp) edge->getParent()->forceUpdateShape = true; // force recheck shape updates for nodes in the output groups. + if (isOutGrp && + "Parameter" != edge->getParent()->getTypeStr()) edge->getParent()->forceUpdateShape = true; // force recheck shape updates for nodes in the output groups. } } } @@ -1076,7 +1077,8 @@ void Graph::PullOutputData(BlobMap &out) { void *ext_blob_ptr = ext_blob->buffer(); void *intr_blob_ptr = intr_blob.GetData(); - // That is the same memory. No need to copy + DEBUG_LOG(name, " @ ", intr_blob_ptr, " -> ", ext_blob_ptr, " zero-copy: ", intr_blob_ptr==ext_blob_ptr, " for ", GetName()); + // That is the same memory. No need to copys if (ext_blob_ptr == intr_blob_ptr) continue; if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) { From 1696a1d4c2e40603007e1b79fbef5ee055c7b623 Mon Sep 17 00:00:00 2001 From: ceciliapeng Date: Wed, 14 Jun 2023 18:18:24 -0700 Subject: [PATCH 28/28] fix with a custom IAllocator. --- src/plugins/intel_cpu/src/graph.cpp | 7 +- src/plugins/intel_cpu/src/infer_request.cpp | 28 +++---- src/plugins/intel_cpu/src/infer_request.h | 1 - src/plugins/intel_cpu/src/node.cpp | 5 ++ src/plugins/intel_cpu/src/output_mem_mgr.cpp | 80 ++++++++++++++++++-- src/plugins/intel_cpu/src/output_mem_mgr.h | 42 ++++++++-- 6 files changed, 138 insertions(+), 25 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index d7efa45545eab6..3c1647e7416eff 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -907,7 +907,7 @@ void Graph::AllocateWithReuse() { IE_ASSERT(isOutGrp==1); // reuse_io_tensors false grpMemMngr = std::make_shared(grpMemMngr); - DEBUG_LOG(grpMemMngr); + DEBUG_LOG(grpMemMngr, " ", this); // Store the output memory managers. // So that, the infer requests can be able to get access to them. @@ -1039,6 +1039,11 @@ void Graph::PullOutputData(BlobMap &out) { const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc()); auto &expectedDesc = ext_blob->getTensorDesc(); + // FIXME: suppose outputs of dynamic graph are sharing memory. + if (Graph::Status::ReadyDynamic == getDynStatus()) { + return; + } + // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it // WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar bool isScalarOutput = false; diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp index 9043a450e95add..8977db0bdbe6ef 100644 --- a/src/plugins/intel_cpu/src/infer_request.cpp +++ b/src/plugins/intel_cpu/src/infer_request.cpp @@ -268,12 +268,22 @@ void InferRequestBase::changeDefaultPtr() { if (Graph::Status::ReadyDynamic == graph->getDynStatus()) { bool canBeInPlace = true; // TODO: filter - IE_ASSERT(outputMemMngrs[it.first]); + + OutputMemoryMngrPtr outputMemMngr; + const auto &outMemMngrMap = graph->outputNodesMemMngrMap; + auto itr = outMemMngrMap.find(it.first); + if (itr != outMemMngrMap.end()) { + outputMemMngr = std::dynamic_pointer_cast(itr->second); + IE_ASSERT(outputMemMngr); + } + IE_ASSERT(outputAllocators[it.first]); if (canBeInPlace) { - outputMemMngrs[it.first]->setAllocator(outputAllocators[it.first]); + outputMemMngr->setAllocator(outputAllocators[it.first]); + DEBUG_LOG(this, " ", outputMemMngr, " ", outputAllocators[it.first], " ", graph); } else { - outputMemMngrs[it.first]->setAllocator(nullptr); + outputMemMngr->setAllocator(nullptr); + changeEdgePtr(parentEdge, it.second); } continue; @@ -648,17 +658,8 @@ void InferRequest::initBlobs() { InferRequest::GetBlob(it.first); } for (const auto& it : modelOutputsMap) { - outputAllocators[it.first] = InferenceEngine::CreateDefaultAllocator(); + outputAllocators[it.first] = InferenceEngine::CreateOutputAllocator(); InferRequest::GetBlob(it.first); - - const auto &outMemMngrMap = graph->outputNodesMemMngrMap; - auto itr = outMemMngrMap.find(it.first); - if (itr != outMemMngrMap.end()) { - OutputMemoryMngrPtr outMemMngr; - outMemMngr = std::dynamic_pointer_cast(itr->second); - IE_ASSERT(outMemMngr); - outputMemMngrs[it.first] = outMemMngr; - } } } @@ -835,6 +836,7 @@ InferenceEngine::Blob::Ptr InferRequest::GetBlob(const std::string& name) { data = make_blob_with_precision(desc, outputAllocators[name]); data->allocate(); + DEBUG_LOG(static_cast(data->buffer()), "_", data->byteSize()); } else { const auto& blobDims = data->getTensorDesc().getDims(); // in static shape case is enough information that shapes are incompatible to throw exception diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h index cddc56ecf4c4e9..5d86c0268479e8 100644 --- a/src/plugins/intel_cpu/src/infer_request.h +++ b/src/plugins/intel_cpu/src/infer_request.h @@ -73,7 +73,6 @@ class InferRequestBase : public InferenceEngine::IInferRequestInternal { virtual void changeDefaultPtr(); std::unordered_map> outputAllocators; - std::unordered_map outputMemMngrs; // reference to the memmanager }; class LegacyInferRequest : public InferRequestBase { diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 57cf9d436e1772..9afd7bd09a15f2 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -565,6 +565,7 @@ std::vector Node::getAvailableFormatsForDims(const Shape &di void Node::updateShapes() { IE_ASSERT(isDynamicNode()) << "Node::updateShapes() is called to a static shape node of type: " << getTypeStr() << " with name: " << getName(); + DEBUG_LOG(getName(), ", ", forceUpdateShape); if (needShapeInfer() || forceUpdateShape) { auto result = shapeInfer(); if (ShapeInferStatus::success == result.status) { @@ -605,6 +606,7 @@ bool Node::outputShapeDataDependency() const { } void Node::redefineOutputMemory(const std::vector &newOutputShapes) { + DEBUG_LOG(getName()); if (newOutputShapes.size() != outputShapes.size()) { IE_THROW() << "Number shapes mismatch with real outputs number for node with name: " << getName(); } @@ -624,7 +626,10 @@ void Node::redefineOutputMemory(const std::vector &newOutputShapes) const bool hasZeroDims = std::count(std::begin(newOutputShape), std::end(newOutputShape), 0) > 0; const auto memDesc = getBaseMemDescAtOutputPort(i)->cloneWithNewDims(newOutputShape, hasZeroDims); for (size_t j = 0; j < edges.size(); j++) { + auto old_mem_ptr = edges[j]->getMemoryPtr()->GetData(); edges[j]->getMemoryPtr()->redefineDesc(memDesc); + auto new_mem_ptr = edges[j]->getMemoryPtr()->GetData(); + DEBUG_LOG(getName(), " output ", i, " edge ", j, " ", old_mem_ptr, " -> ", new_mem_ptr); } } } diff --git a/src/plugins/intel_cpu/src/output_mem_mgr.cpp b/src/plugins/intel_cpu/src/output_mem_mgr.cpp index 3bd85dfc79baa4..fed5292a27ebb0 100644 --- a/src/plugins/intel_cpu/src/output_mem_mgr.cpp +++ b/src/plugins/intel_cpu/src/output_mem_mgr.cpp @@ -3,28 +3,95 @@ // #include "output_mem_mgr.h" +#include "utils/debug_capabilities.h" using namespace ov::intel_cpu; +namespace InferenceEngine { +IE_SUPPRESS_DEPRECATED_START +std::shared_ptr CreateOutputAllocator() noexcept { + try { + auto data = std::make_shared(); + DEBUG_LOG(data); + return data; + } catch (...) { + return nullptr; + } +} + +void OutputAllocator::release(void *ptr) { DEBUG_LOG(ptr); } +void OutputAllocator::destroy(void *ptr) { + DEBUG_LOG(ptr); + if (!ptr) return; + + // dnnl::impl::free(ptr); + delete[] reinterpret_cast(ptr); + ptr = nullptr; +} + +void* OutputAllocator::alloc(size_t size) noexcept { + try { + // constexpr int cacheLineSize = 64; + // void *ptr = dnnl::impl::malloc(size, cacheLineSize); + // if (!ptr) { + // IE_THROW() << "Failed to allocate " << size << " bytes of memory"; + // } + // if (size) { + auto ptr = reinterpret_cast(new char[size]); + _data = decltype(_data)(ptr, destroy); + // } else { + // _data = decltype(_data)(nullptr, release); + // } + + DEBUG_LOG(_data.get(), "_", size); + + return _data.get(); + } catch (...) { + return nullptr; + } +} +} // namespace InferenceEngine + +void OutputMemoryMngr::setAllocator(std::shared_ptr allocator) { + DEBUG_LOG(allocator, " ", allocator ? std::dynamic_pointer_cast(allocator)->getRawPtr() : "null", " this = ", this); + + if (allocator) { + auto _allocator = std::dynamic_pointer_cast(allocator); + IE_ASSERT(_allocator); + m_allocator = _allocator; + } else { + m_allocator = nullptr; + } +} + void* OutputMemoryMngr::getRawPtr() const noexcept { + void *ptr; if (m_allocator) { - return _data; + ptr = m_allocator->getRawPtr(); } else { - return m_pMngr->getRawPtr(); + ptr = m_pMngr->getRawPtr(); } + + DEBUG_LOG(m_allocator, " ", ptr, " this = ", this); + return ptr; } void OutputMemoryMngr::setExtBuff(void* ptr, size_t size) { - IE_ASSERT(!m_allocator); // FIXME: shouldn't set extbuff when there is an allocator? + DEBUG_LOG(ptr, "_", size, " this = ", this); + if (m_allocator) { + IE_THROW() << "Should not call setExtBuff when it is an OutputMemoryMngr with the allocator!"; + return; + } return m_pMngr->setExtBuff(ptr, size); } bool OutputMemoryMngr::resize(size_t size) { + DEBUG_LOG(m_allocator, " ", size, " ", _memUpperBound, " this = ", this); if (m_allocator) { - // constexpr int cacheLineSize = 64; bool sizeChanged = false; if (size > _memUpperBound) { - _data = m_allocator->alloc(size); + auto ptr = m_allocator->alloc(size); + DEBUG_LOG(ptr, "_", size); _memUpperBound = size; sizeChanged = true; } @@ -35,13 +102,16 @@ bool OutputMemoryMngr::resize(size_t size) { } bool OutputMemoryMngr::hasExtBuffer() const noexcept { + DEBUG_LOG("", " this = ", this); return true; } void OutputMemoryMngr::registerMemory(Memory* memPtr) { + DEBUG_LOG(memPtr->GetData(), " this = ", this); m_pMngr->registerMemory(memPtr); } void OutputMemoryMngr::unregisterMemory(Memory* memPtr) { + DEBUG_LOG(memPtr->GetData(), " this = ", this); m_pMngr->unregisterMemory(memPtr); } \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/output_mem_mgr.h b/src/plugins/intel_cpu/src/output_mem_mgr.h index 9a405ce67fc577..44d4617821c6c5 100644 --- a/src/plugins/intel_cpu/src/output_mem_mgr.h +++ b/src/plugins/intel_cpu/src/output_mem_mgr.h @@ -8,7 +8,40 @@ #include "openvino/runtime/allocator.hpp" #include "memory_desc/cpu_memory_desc.h" #include "ie_allocator.hpp" // IE public header -// #include "blob_allocator.hpp" + +IE_SUPPRESS_DEPRECATED_START +namespace InferenceEngine { +class OutputAllocator : public InferenceEngine::IAllocator { +public: + OutputAllocator() : _data(nullptr, release) {} + + void* lock(void* handle, InferenceEngine::LockOp = InferenceEngine::LOCK_FOR_WRITE) noexcept override { + return handle; + } + + void unlock(void* handle) noexcept override {} + + void* alloc(size_t size) noexcept override; + + bool free(void* handle) noexcept override { + // do nothing, as _data will be deleted along with the instance. + return true; + } + + void* getRawPtr() { + return _data.get(); + } + +private: + std::unique_ptr _data; + + static void release(void *ptr); + static void destroy(void *ptr); +}; + +std::shared_ptr CreateOutputAllocator() noexcept; +} // namespace InferenceEngine +IE_SUPPRESS_DEPRECATED_END namespace ov { namespace intel_cpu { @@ -27,13 +60,12 @@ class OutputMemoryMngr : public IMemoryMngrObserver { void registerMemory(Memory* memPtr) override; void unregisterMemory(Memory* memPtr) override; - void setAllocator(std::shared_ptr allocator) { m_allocator = allocator; } + void setAllocator(std::shared_ptr allocator); private: - std::shared_ptr m_allocator; - + std::shared_ptr m_allocator; size_t _memUpperBound = 0ul; - void* _data; + // We need the default MemMngr as may fallback to copy output... and // we have no idea of this in early stages of graph memory allocation. MemoryMngrPtr m_pMngr;