diff --git a/src/common/snippets/include/snippets/lowered/loop_info.hpp b/src/common/snippets/include/snippets/lowered/loop_info.hpp index e763f2244d76c6..8878affdc0fb35 100644 --- a/src/common/snippets/include/snippets/lowered/loop_info.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_info.hpp @@ -156,16 +156,6 @@ class LoopInfo { const char* get_type_name() const { return get_type_info().name; } - /** - * @brief Return true if expression port is a loop port - * @param expr_port - expression port to check - */ - bool is_loop_port(const ExpressionPort& expr_port); - /** - * @brief Return loop port of an expression port - * @param expr_port - expression port. - */ - const LoopPort& get_loop_port(const ExpressionPort& expr_port); protected: /** @@ -334,15 +324,6 @@ class UnifiedLoopInfo : public LoopInfo { */ void replace_with_new_ports(const ExpressionPort& actual_port, const std::vector& target_ports) override; - /** - * @brief Remove remove_ports and add add_ports to the current LoopPort. - * This function removes ports directly and adds ports at the end of current LoopPort, caller is responsible to - * sort the LoopPort after LoopPort being updated according to execution order of the expressions. - * Note: all port in remove_ports and add_ports should have the same type. - * @param remove_ports need to be removed - * @param add_ports need to be added - */ - void update_loop_ports(const std::vector& remove_ports, const std::vector& add_ports); /** * @brief Iterates through all LoopPortDesc and call `caller` for each of them * @param caller - function that called for each LoopPortDesc @@ -393,23 +374,6 @@ class UnifiedLoopInfo : public LoopInfo { * - Consistency of ports and descriptors */ void validate() const; - /** - * @brief Remove the current LoopPort that contains ExpressionPort. - * Note: If there is no LoopPort with ExpressionPort `ports`, does nothing. - * This function removes ports directly, caller is responsible to sort the LoopPort after updated - * according to execution order of the expressions. - * Note: all port in ports should have the same type. - * @param ports need to be removed - */ - void remove_loop_ports(const std::vector& ports); - /** - * @brief Add ports to the current LoopPort. - * This function adds ports in end of current LoopPort vector, caller is responsible to - * sort the LoopPort after updated according to execution order of the expressions. - * Note: all port in ports should have the same type. - * @param ports need to be added - */ - void add_loop_ports(const std::vector& ports); SpecificIterationHandlers m_handlers = {}; std::vector m_input_port_descs = {}; diff --git a/src/common/snippets/include/snippets/lowered/loop_manager.hpp b/src/common/snippets/include/snippets/lowered/loop_manager.hpp index b34c0ee2b3f975..f0718107ca30a2 100644 --- a/src/common/snippets/include/snippets/lowered/loop_manager.hpp +++ b/src/common/snippets/include/snippets/lowered/loop_manager.hpp @@ -3,6 +3,7 @@ // #pragma once + #include #include @@ -217,7 +218,7 @@ class LoopManager { * @param loop_end_pos the next iterator after the last expression * @param loop_id target Loop ID */ - void sort_loop_ports(const LinearIR::constExprIt& loop_begin_pos, const LinearIR::constExprIt& loop_end_pos, size_t loop_id); + void sort_loop_ports(LinearIR::constExprIt& loop_begin_pos, LinearIR::constExprIt& loop_end_pos, size_t loop_id); /** * @brief When the previous expression was replaced with new expressions (decomposition), the method updates the corresponding Loop. * If ports of decomposed expression were the Loop ports, these Loop ports may be updated by parameters `entries` and `exits` @@ -275,6 +276,7 @@ class LoopManager { */ bool reorder_identifiers(const std::map& loop_id_map); +private: /** * @brief Add new Loop Info to the map * @param loop target loop info @@ -286,8 +288,6 @@ class LoopManager { * @param index the target index of Loop */ void remove_loop_info(size_t index); - -private: /** * @brief Find expression ports in bounds that are connected to consumers or parent that aren't in these bounds * @param loop_begin_pos the first expression iterator of the Loop diff --git a/src/common/snippets/include/snippets/lowered/pass/extract_loop_invariants.hpp b/src/common/snippets/include/snippets/lowered/pass/extract_loop_invariants.hpp deleted file mode 100644 index d4fb19f8d0c6a4..00000000000000 --- a/src/common/snippets/include/snippets/lowered/pass/extract_loop_invariants.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "pass.hpp" -#include "snippets/lowered/loop_manager.hpp" - -namespace ov { -namespace snippets { -namespace lowered { -namespace pass { - -/** - * @interface ExtractLoopInvariants - * @brief Extracts expressions that produce identical result on every loop iteration outside of the loop's body. - * This extraction is to remove repeated computation, not cover constant subgraph extraction. - * @ingroup snippets - */ -class ExtractLoopInvariants : public RangedPass { -public: - OPENVINO_RTTI("ExtractLoopInvariants", "RangedPass") - ExtractLoopInvariants() = default; - bool run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) override; -}; - -} // namespace pass -} // namespace lowered -} // namespace snippets -} // namespace ov diff --git a/src/common/snippets/include/snippets/utils.hpp b/src/common/snippets/include/snippets/utils.hpp index 268bb838df92f7..99fb9a3a4196ff 100644 --- a/src/common/snippets/include/snippets/utils.hpp +++ b/src/common/snippets/include/snippets/utils.hpp @@ -131,10 +131,6 @@ inline size_t get_output_dim_idx(const std::vector& layout, size_t dim_i // dim_idx starts from the layout end size_t get_dim_idx(const lowered::ExpressionPort& port, size_t dim_idx); -// get stride on dimenison of dim_idx -// given shape [a,b,c,d], the stride is [b*c*d, c*d, d, 1] -int64_t get_stride(size_t dim_idx, const VectorDims& shape); - /* ----- Shape `getters` ----- */ /** * @brief Returns a dense shape after applying the order. diff --git a/src/common/snippets/src/lowered/loop_info.cpp b/src/common/snippets/src/lowered/loop_info.cpp index f2e1b9c6a3c2e2..5564a65bd81d91 100644 --- a/src/common/snippets/src/lowered/loop_info.cpp +++ b/src/common/snippets/src/lowered/loop_info.cpp @@ -98,7 +98,7 @@ std::vector::iterator LoopInfo::find_loop_port(const LoopPort& loop_po auto& ports = loop_port.expr_port->get_type() == ExpressionPort::Input ? m_input_ports : m_output_ports; const auto it = std::find_if(ports.begin(), ports.end(), [&loop_port](const LoopPort& port) { return port == loop_port; }); - OPENVINO_ASSERT(it != ports.end(), "Failed find_loop_port: existing loop port has not been found"); + OPENVINO_ASSERT(it != ports.end(), "Failed update_loop_port: existing loop port has not been found"); return it; } @@ -110,17 +110,6 @@ std::vector::iterator LoopInfo::find_loop_port(const ExpressionPort& e return it; } -bool LoopInfo::is_loop_port(const ExpressionPort& expr_port) { - const auto& loop_port_it = find_loop_port(expr_port); - const auto& ports = expr_port.get_type() == ExpressionPort::Input ? m_input_ports : m_output_ports; - return loop_port_it != ports.end(); -} - -const LoopPort& LoopInfo::get_loop_port(const ExpressionPort& expr_port) { - OPENVINO_ASSERT(is_loop_port(expr_port), "Failed get_loop_port: expr_port is not a loop port"); - return *find_loop_port(expr_port); -} - void LoopInfo::replace_with_new_ports(const LoopPort& actual_port, const std::vector& target_ports) { auto& ports = actual_port.expr_port->get_type() == ExpressionPort::Input ? m_input_ports : m_output_ports; auto port_it = find_loop_port(actual_port); @@ -270,7 +259,7 @@ void order(const std::vector& new_order, std::vector& values) { "Failed to sort values: `new_order` must contain new indexes for ALL values"); std::vector ordered_values(values.size()); for (size_t i = 0; i < values.size(); ++i) { - ordered_values[i] = values[new_order[i]]; + ordered_values[new_order[i]] = values[i]; } values = std::move(ordered_values); } @@ -325,51 +314,6 @@ void UnifiedLoopInfo::replace_with_new_ports(const ExpressionPort& actual_port, validate(); } -void UnifiedLoopInfo::update_loop_ports(const std::vector& actual_ports, const std::vector& target_ports) { - add_loop_ports(target_ports); - remove_loop_ports(actual_ports); - validate(); -} - -void UnifiedLoopInfo::remove_loop_ports(const std::vector& ports) { - if (ports.empty()) - return; - bool is_input = ports[0].get_type() == ExpressionPort::Input; - auto& loop_ports = is_input ? m_input_ports : m_output_ports; - auto& loop_ports_desc = is_input ? m_input_port_descs : m_output_port_descs; - for (size_t i = 0; i < ports.size(); i++) { - OPENVINO_ASSERT(is_input ? (ports[i].get_type() == ExpressionPort::Input) : (ports[i].get_type() == ExpressionPort::Output), - "ports in remove_loop_ports have different type."); - auto port_it = find_loop_port(ports[i]); - // if not in loop ports, skip - if (port_it == loop_ports.end()) - continue; - - loop_ports.erase(port_it); - auto dist = std::distance(loop_ports.begin(), port_it); - loop_ports_desc.erase(loop_ports_desc.begin() + dist); - } -} - -void UnifiedLoopInfo::add_loop_ports(const std::vector& ports) { - if (ports.empty()) - return; - bool is_input = ports[0].get_type() == ExpressionPort::Input; - auto& loop_ports = is_input ? m_input_ports : m_output_ports; - auto& loop_ports_desc = is_input ? m_input_port_descs : m_output_port_descs; - size_t loop_dim_idx = get_dim_idx(); - for (size_t i = 0; i < ports.size(); i++) { - OPENVINO_ASSERT(is_input ? (ports[i].get_type() == ExpressionPort::Input) : (ports[i].get_type() == ExpressionPort::Output), - "ports in add_loop_ports have different type."); - // if already in loop ports, skip - auto loop_port = find_loop_port(ports[i]); - if (loop_port != loop_ports.end()) - continue; - loop_ports.push_back(LoopPort(ports[i], true, loop_dim_idx)); - loop_ports_desc.push_back(LoopPortDesc()); - } -} - ExpandedLoopInfo::ExpandedLoopInfo(size_t work_amount, size_t increment, const std::vector& entries, const std::vector& exits, std::vector ptr_increments, std::vector final_offsets, std::vector data_sizes, diff --git a/src/common/snippets/src/lowered/loop_manager.cpp b/src/common/snippets/src/lowered/loop_manager.cpp index 52cf147cff2495..224e1add666948 100644 --- a/src/common/snippets/src/lowered/loop_manager.cpp +++ b/src/common/snippets/src/lowered/loop_manager.cpp @@ -123,8 +123,15 @@ std::pair LoopManager::get_loop_bo } LoopPort LoopManager::get_loop_port_by_expr_port(const ExpressionPort& expr_port, const size_t loop_id) { + auto get_loop_port = [&](const std::vector& ports) { + auto it = std::find_if(ports.cbegin(), ports.cend(), [&](const LoopPort& p) { return *p.expr_port == expr_port; }); + if (it == ports.cend()) + OPENVINO_THROW("Expression has not been found among loop ports. Loop id: " + std::to_string(loop_id)); + return *it; + }; const auto& loop_info = get_loop_info(loop_id); - return loop_info->get_loop_port(expr_port); + return expr_port.get_type() == ExpressionPort::Input ? get_loop_port(loop_info->get_input_ports()) + : get_loop_port(loop_info->get_output_ports()); } void LoopManager::get_io_loop_ports(LinearIR::constExprIt loop_begin_pos, @@ -390,7 +397,7 @@ void LoopManager::expression_replacement(LinearIR::constExprIt new_expr_begin, L } } -void LoopManager::sort_loop_ports(const LinearIR::constExprIt& loop_begin_pos, const LinearIR::constExprIt& loop_end_pos, size_t loop_id) { +void LoopManager::sort_loop_ports(LinearIR::constExprIt& loop_begin_pos, LinearIR::constExprIt& loop_end_pos, size_t loop_id) { // [113536] Update this logic please, when expression numeration will be implemented const auto& loop_info = get_loop_info(loop_id); const auto& loop_entries = loop_info->get_input_ports(); diff --git a/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp b/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp index d4e42c8179f6c1..dc65918a96ecd0 100644 --- a/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp +++ b/src/common/snippets/src/lowered/pass/define_buffer_clusters.cpp @@ -320,6 +320,8 @@ bool DefineBufferClusters::are_buffer_neighbours(const ExpressionPtr& up, const void DefineBufferClusters::parse_memory_access_op(const ExpressionPtr& expr) { const auto ma = std::dynamic_pointer_cast(expr->get_node()); + if (!ma->is_full_memory_access_op(expr->get_node())) + return; // TODO: Some full MemoryAccess ops can have inplace inputs and outputs in general. // Need to add mechanism of inplace ports using MemoryAccess::PortDescriptor::inplace for (const auto& input : expr->get_input_port_connectors()) { diff --git a/src/common/snippets/src/lowered/pass/extract_loop_invariants.cpp b/src/common/snippets/src/lowered/pass/extract_loop_invariants.cpp deleted file mode 100644 index 3f8086ebbce894..00000000000000 --- a/src/common/snippets/src/lowered/pass/extract_loop_invariants.cpp +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "snippets/lowered/pass/extract_loop_invariants.hpp" - -#include "snippets/itt.hpp" -#include "snippets/lowered/linear_ir.hpp" -#include "snippets/snippets_isa.hpp" -#include "snippets/utils.hpp" - -namespace ov { -namespace snippets { -namespace lowered { -namespace pass { -namespace { -void remove_last_loop_id(const std::shared_ptr& expr) { - auto loop_ids = expr->get_loop_ids(); - OPENVINO_ASSERT(!loop_ids.empty(), "Expr loop_ids should not be empty when remove last loop id."); - loop_ids.pop_back(); - expr->set_loop_ids(loop_ids); -} - -int64_t get_stride_after_move_outer(const LoopPort& loop_port) { - const auto& expr_port = loop_port.expr_port; - const auto& shape = expr_port->get_descriptor_ptr()->get_shape(); - size_t shape_dim_idx = utils::get_dim_idx(*expr_port, loop_port.dim_idx); - int64_t stride = utils::get_stride(shape_dim_idx, shape); - if (utils::is_dynamic_value(stride) || utils::is_dynamic_value(shape[shape_dim_idx])) { - return utils::get_dynamic_value(); - } else { - return stride * static_cast(shape[shape_dim_idx]); - } -} - -bool is_extraction_applicable(const ExpressionPtr& expr, const UnifiedLoopInfoPtr& inner_loop_info) { - const auto& expr_input_ports = expr->get_input_ports(); - const auto& input_port_size = expr_input_ports.size(); - if (input_port_size == 0) - return false; - - for (size_t i = 0; i < input_port_size; ++i) { - const auto& parent = expr->get_input_port_connector(i)->get_source().get_expr(); - bool parent_scalar_with_single_consumer = ov::is_type(parent->get_node()) && - parent->get_output_port_connector(0)->get_consumers().size() == 1; - const auto& is_loop_port = inner_loop_info->is_loop_port(expr_input_ports[i]); - // If expr input port is not a loop input port, then should not extract. In this case expr depend on result of another expr in inner loop, - // i.e. move expr to top(outside) of inner loop does not keep data dependency. - // If expr has parent scalar which has single consumer, expr and parent scalar could be extracted together. If parent scalar has multiple - // consumers, the scalar has chance to move with other consumers, which maybe break data dependency as well. - if (!is_loop_port && !parent_scalar_with_single_consumer) { - return false; - } - if (is_loop_port) { - // stride is not 1 after move to outside, then should not extract. - const auto& loop_port = inner_loop_info->get_loop_port(expr_input_ports[i]); - if (get_stride_after_move_outer(loop_port) != 1) { - return false; - } - } - } - return true; -} - -void extract_expr(const ExpressionPtr& expr, LinearIR& linear_ir, - LinearIR::constExprIt& inner_loop_begin_pos, const LinearIR::constExprIt& inner_loop_end_pos) { - // update expr loop id - remove_last_loop_id(expr); - // move if it is not the first - if (expr != *inner_loop_begin_pos) { - auto port_expr_iter = std::find(inner_loop_begin_pos, inner_loop_end_pos, expr); - OPENVINO_ASSERT(port_expr_iter != inner_loop_end_pos, "Identified extractable expr is not found in loop."); - linear_ir.move(port_expr_iter, inner_loop_begin_pos); - } else { - inner_loop_begin_pos++; - } -} - -void update_loop_ports(const ExpressionPtr& expr, const LoopManagerPtr& loop_manager, size_t inner_loop_id, - const LinearIR::constExprIt& inner_loop_begin_pos, const LinearIR::constExprIt& inner_loop_end_pos) { - const auto& inner_loop_info = loop_manager->get_loop_info(inner_loop_id); - // delete expr input ports from loop input points, add expr output ports' consumers if - // consumed in inner loop to loop input ports. - std::vector new_loop_input_ports; - for (size_t i = 0; i < expr->get_output_count(); i++) { - const auto& consumers = expr->get_output_port_connector(i)->get_consumers(); - for (const auto& consumer : consumers) { - const auto& loop_ids = consumer.get_expr()->get_loop_ids(); - if (std::find(loop_ids.cbegin(), loop_ids.cend(), inner_loop_id) != loop_ids.cend()) { - new_loop_input_ports.push_back(consumer); - } - } - } - const auto& expr_input_ports = expr->get_input_ports(); - inner_loop_info->update_loop_ports(expr_input_ports, new_loop_input_ports); - - // delete expr out ports from loop out ports directly if it's in loop output ports - std::vector out_ports_to_delete; - for (size_t i = 0; i < expr->get_output_count(); ++i) { - const auto& out_port = expr->get_output_port(i); - if (inner_loop_info->is_loop_port(out_port)) { - out_ports_to_delete.push_back(out_port); - } - } - if (!out_ports_to_delete.empty()) { - std::vector new_ports; - inner_loop_info->update_loop_ports(out_ports_to_delete, new_ports); - } - // TODO: 142990. - // Need sort after update loop ports. There are possibility that all exprs are moved to outer loop. - if (!inner_loop_info->get_input_ports().empty() && !inner_loop_info->get_output_ports().empty()) { - loop_manager->sort_loop_ports(inner_loop_begin_pos, inner_loop_end_pos, inner_loop_id); - } -} - -std::set get_loop_input_exprs(const std::vector& loop_in_ports) { - std::set expr_set; - for (size_t i = 0; i < loop_in_ports.size(); ++i) { - expr_set.insert(loop_in_ports[i].expr_port->get_expr()); - } - return expr_set; -} - -bool extract_from_loop(const size_t& inner_loop_id, LinearIR& linear_ir) { - const auto& loop_manager = linear_ir.get_loop_manager(); - bool status = false; - bool continue_to_extract = true; - const auto& inner_loop_info = loop_manager->get_loop_info(inner_loop_id); - while (continue_to_extract) { - const auto& inner_loop_input_ports = inner_loop_info->get_input_ports(); - const auto& potential_extractable_exprs = get_loop_input_exprs(inner_loop_input_ports); - bool expr_extracted = false; - for (const auto& port_expr : potential_extractable_exprs) { - if (is_extraction_applicable(port_expr, inner_loop_info)) { - status = true; - LinearIR::constExprIt inner_loop_begin_pos, inner_loop_end_pos; - std::tie(inner_loop_begin_pos, inner_loop_end_pos) = - loop_manager->get_loop_bounds(linear_ir, inner_loop_id); - // extract scalar on inputs if there are - for (size_t i = 0; i < port_expr->get_input_count(); ++i) { - auto parent = port_expr->get_input_port_connector(i)->get_source().get_expr(); - if (ov::is_type(parent->get_node())) { - extract_expr(parent, linear_ir, inner_loop_begin_pos, inner_loop_end_pos); - } - } - extract_expr(port_expr, linear_ir, inner_loop_begin_pos, inner_loop_end_pos); - update_loop_ports(port_expr, loop_manager, inner_loop_id, inner_loop_begin_pos, inner_loop_end_pos); - expr_extracted = true; - break; // extracted and refreshed loop_input_ports. break potential_extractable_exprs loop, and go while() to start again. - } - } - if (inner_loop_input_ports.size() == 0 && inner_loop_info->get_output_ports().size() == 0) { - // If the loop becomes empty (inner_loop_input_ports is ref) after extraction, remove it from loop_manager - loop_manager->remove_loop_info(inner_loop_id); - break; - } - // no more extractable expr in this loop after go through all potential_extractable_exprs, done for this loop. - if (!expr_extracted) - continue_to_extract = false; - } - return status; -} -} // namespace - -bool ExtractLoopInvariants::run(LinearIR& linear_ir, lowered::LinearIR::constExprIt begin, lowered::LinearIR::constExprIt end) { - OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ExtractLoopInvariants") - bool modified = false; - - const auto& loop_depth = linear_ir.get_config().m_loop_depth; - std::vector> loop_ids_need_extract(loop_depth); - const auto& loop_map = linear_ir.get_loop_manager()->get_map(); - for (const auto& loop : loop_map) { - const auto& loop_dim = loop.second->get_dim_idx(); - if (loop_dim != LoopInfo::UNDEFINED_DIM_IDX) { - OPENVINO_ASSERT(loop_dim < loop_depth, "dim_idx of loop should be smaller than loop_depth"); - loop_ids_need_extract[loop_dim].insert(loop.first); - } - } - // move invariant expr to top(outside) of current loop - for (size_t d = 0; d < loop_depth; d++) { - const auto& loops_in_this_depth = loop_ids_need_extract[d]; - for (const auto& loop_id : loops_in_this_depth) { - modified |= extract_from_loop(loop_id, linear_ir); - } - } - - return modified; -} - -} // namespace pass -} // namespace lowered -} // namespace snippets -} // namespace ov diff --git a/src/common/snippets/src/lowered/pass/init_loops.cpp b/src/common/snippets/src/lowered/pass/init_loops.cpp index 32c44729bc04c9..3b9aa3f5109f23 100644 --- a/src/common/snippets/src/lowered/pass/init_loops.cpp +++ b/src/common/snippets/src/lowered/pass/init_loops.cpp @@ -20,6 +20,17 @@ namespace pass { using MemoryAccess = ov::snippets::modifier::MemoryAccess; namespace { +inline int64_t get_stride(size_t dim, const VectorDims& shape) { + int64_t stride = 1; + for (size_t i = dim + 1; i < shape.size(); ++i) { + if (utils::is_dynamic_value(shape[i])) { + return utils::get_dynamic_value(); + } + stride *= static_cast(shape[i]); + } + return stride; +} + inline void init_is_incremented(LoopPort& port, size_t loop_id) { const auto& expr = port.expr_port->get_expr(); const auto& expr_loops = expr->get_loop_ids(); @@ -82,7 +93,7 @@ inline int64_t get_ptr_increment(const LoopPort& loop_port, size_t work_amount, if (utils::is_dynamic_value(shape[dim]) && port_count > 1) { return utils::get_dynamic_value(); } else if (!(shape[dim] == 1 && work_amount != 1)) { - return utils::get_stride(dim, shape); + return get_stride(dim, shape); } return 0; } diff --git a/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp index 27232f1605ea0e..5ead7be79fca83 100644 --- a/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp +++ b/src/common/snippets/src/lowered/pass/load_movebroadcast_to_broadcastload.cpp @@ -31,6 +31,9 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir, lowered::LinearI load_expr->get_input_port_descriptor(0)->get_shape() != load_expr->get_output_port_descriptor(0)->get_shape()) continue; + OPENVINO_ASSERT(expr->get_loop_ids() == load_expr->get_loop_ids(), + "The pair of Load and MoveBroadcast expressions must be in the same loops!"); + // Cannot rewrite Broadcast + Load if load has more than 1 user // or more than one input, or if Broadcast has several inputs const auto load_consumers_inputs = interm_connector->get_consumers(); @@ -47,10 +50,7 @@ bool LoadMoveBroadcastToBroadcastLoad::run(LinearIR& linear_ir, lowered::LinearI const auto& load_parent_node = load_expr->get_input_port_connector(0)->get_source().get_expr()->get_node(); const auto& outshape = move_broadcast->get_output_partial_shape(0); const auto broadcastload = std::make_shared(load_parent_node, *outshape.rbegin(), load->get_offset()); - // insert at position of load_expr. As BroadcastMove and Load will be removed, preserve expr_it. - expr_it = std::next(expr_it); - linear_ir.replace_with_node({ load_expr, expr }, broadcastload, load_expr->get_loop_ids(), linear_ir.find(load_expr)); - expr_it = std::prev(expr_it); + expr_it = linear_ir.replace_with_node({ load_expr, expr }, broadcastload); modified |= true; } } diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 003db9a8527249..51e0e3b904ff50 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -54,7 +54,6 @@ #include "snippets/lowered/pass/normalize_loop_ids.hpp" #include "snippets/lowered/pass/validate_expanded_loops.hpp" #include "snippets/lowered/pass/set_load_store_scalar.hpp" -#include "snippets/lowered/pass/extract_loop_invariants.hpp" #include "transformations/utils/utils.hpp" @@ -460,7 +459,6 @@ void Subgraph::control_flow_transformations(size_t min_parallel_work_amount, siz pipeline.register_pass(); pipeline.register_pass(); pipeline.register_pass(); - pipeline.register_pass(); pipeline.register_pass(); pipeline.register_pass(); pipeline.register_pass(); diff --git a/src/common/snippets/src/utils.cpp b/src/common/snippets/src/utils.cpp index 2204d00e181fb0..c2a49b8e0a4d41 100644 --- a/src/common/snippets/src/utils.cpp +++ b/src/common/snippets/src/utils.cpp @@ -142,18 +142,6 @@ size_t get_dim_idx(const lowered::ExpressionPort& port, size_t dim_idx) { return 0; } -int64_t get_stride(size_t dim_idx, const VectorDims& shape) { - OPENVINO_ASSERT(dim_idx < shape.size(), "dim_idx should be in range of [0, shape.size()) in get_stride"); - int64_t stride = 1; - for (size_t i = dim_idx + 1; i < shape.size(); ++i) { - if (utils::is_dynamic_value(shape[i])) { - return utils::get_dynamic_value(); - } - stride *= static_cast(shape[i]); - } - return stride; -} - ov::PartialShape get_planar_pshape(const ov::PartialShape& shape, const std::vector& order) { return get_pshape(shape, order, true); } diff --git a/src/common/snippets/tests/include/lir_test_utils.hpp b/src/common/snippets/tests/include/lir_test_utils.hpp index 2f687f6e1412d1..62ff366eb54c9b 100644 --- a/src/common/snippets/tests/include/lir_test_utils.hpp +++ b/src/common/snippets/tests/include/lir_test_utils.hpp @@ -58,24 +58,6 @@ void create_and_add_unified_loop_info(const std::shared_ptr& entries, const std::vector& exits, bool add_default_handlers = true); -/** - * @brief Creates unified loop info based on provided entry and exit points, and adds it to the linear_ir's loops map. - * Meanwhile set loop id to expr range [loop_begin_pos, loop_end_pos). - * @attention This helper wraps LoopManager::mark_loop method, which also marks expressions with the corresponding loop info - * @param linear_ir linear_ir in which loop info should be added - * @param loop_begin_pos begin expr postion in this loop - * @param loop_end_pos end expr postion in this loop - * @param entries entry points of loop - * @param exits exit points of loop - */ -void create_and_add_unified_loop_info(const std::shared_ptr& linear_ir, - ov::snippets::lowered::LinearIR::constExprIt loop_begin_pos, - ov::snippets::lowered::LinearIR::constExprIt loop_end_pos, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool add_default_handlers = true); } // namespace snippets } // namespace test } // namespace ov diff --git a/src/common/snippets/tests/src/lir_comparator.cpp b/src/common/snippets/tests/src/lir_comparator.cpp index 3552c81cdf40a5..ed20853a0ea425 100644 --- a/src/common/snippets/tests/src/lir_comparator.cpp +++ b/src/common/snippets/tests/src/lir_comparator.cpp @@ -96,9 +96,8 @@ LIRComparator::Result LIRComparator::compare(const LinearIRPtr& linear_ir, for (auto result_it = results.begin(), result_it_ref = results_ref.begin(); result_it != results.end(); ++result_it, ++result_it_ref) PROPAGATE_ERROR("", run_comparison(result_it, result_it_ref)); - if (should_compare(LIRCmpValues::LOOP_MANAGER)) { + if (should_compare(LIRCmpValues::LOOP_MANAGER)) PROPAGATE_ERROR("Loop managers", compare_loop_managers(linear_ir->get_loop_manager(), linear_ir_ref->get_loop_manager())); - } return Result::ok(); } diff --git a/src/common/snippets/tests/src/lir_test_utils.cpp b/src/common/snippets/tests/src/lir_test_utils.cpp index 71cfadc9c3a442..31a2092acc3020 100644 --- a/src/common/snippets/tests/src/lir_test_utils.cpp +++ b/src/common/snippets/tests/src/lir_test_utils.cpp @@ -91,20 +91,9 @@ void create_and_add_unified_loop_info(const LinearIRPtr& linear_ir, const std::vector& entries, const std::vector& exits, bool set_default_handlers) { - // Equal begin and end iterators are set to avoid expressions marking with new loop id - create_and_add_unified_loop_info(linear_ir, linear_ir->begin(), linear_ir->begin(), work_amount, increment, entries, exits, set_default_handlers); -} - -void create_and_add_unified_loop_info(const LinearIRPtr& linear_ir, - ov::snippets::lowered::LinearIR::constExprIt loop_begin_pos, - ov::snippets::lowered::LinearIR::constExprIt loop_end_pos, - size_t work_amount, - size_t increment, - const std::vector& entries, - const std::vector& exits, - bool set_default_handlers) { const auto& loop_manager = linear_ir->get_loop_manager(); - loop_manager->mark_loop(loop_begin_pos, loop_end_pos, work_amount, increment, entries, exits, set_default_handlers); + // Equal begin and end iterators are set to avoid expressions marking with new loop id + loop_manager->mark_loop(linear_ir->begin(), linear_ir->begin(), work_amount, increment, entries, exits, set_default_handlers); } } // namespace snippets diff --git a/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp b/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp deleted file mode 100644 index c3f4f5ea7f6877..00000000000000 --- a/src/common/snippets/tests/src/lowered/pass/extracted_loop_invariants.cpp +++ /dev/null @@ -1,423 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "lir_test_utils.hpp" - -#include "openvino/opsets/opset10.hpp" -#include "snippets/lowered/pass/extract_loop_invariants.hpp" -#include "snippets/lowered/pass/normalize_loop_ids.hpp" -#include "snippets/op/broadcastmove.hpp" -#include "snippets/op/scalar.hpp" -#include "snippets/op/vector_buffer.hpp" -#include "snippets/op/horizon_max.hpp" -#include "snippets/op/horizon_sum.hpp" -#include "snippets/op/powerstatic.hpp" - -namespace ov { -namespace test { -namespace snippets { - -using namespace ov::snippets::lowered; -using namespace ov::snippets::lowered::pass; - -class ExtractLoopInvariantsTest : public LoweredPassTestsF { -public: - ExtractLoopInvariantsTest() : LoweredPassTestsF() { - comparator.enable(LIRComparator::LIRCmpValues::LOOP_INDICES); - comparator.enable(LIRComparator::LIRCmpValues::PORT_DESCRIPTORS); - comparator.enable(LIRComparator::LIRCmpValues::PORT_CONNECTORS); - comparator.enable(LIRComparator::LIRCmpValues::LOOP_MANAGER); - } - - void SetUp() override { - pipeline.register_pass(); - } -}; - -TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithParams) { - size_t vector_size = 16; - const auto input_precision = ov::element::f32; - const ov::Shape input_shape0{1}; - const ov::Shape input_shape1{512}; - const std::vector layout_1d{{0}, {0}, {0}}; - const std::vector mul_subtensor{{1}, {1}, {1}}; - const std::vector sub_subtensor{{512}, {1}, {512}}; - /* - * Param00 Param01 - * \ / - * Multiply(loopBegin) - * | - * Broadcast Param1 - * \ / - * Substract(loopBeginRef) - * | - * Result(LoopEnd and LoopEndRef) - */ - { - auto param00 = linear_ir->push_node(input_precision, input_shape0); - auto param01 = linear_ir->push_node(input_precision, input_shape0); - auto param1 = linear_ir->push_node(input_precision, input_shape1); - auto multiply = linear_ir->push_node(param00.second, param01.second); - init_expr_descriptors(*multiply.first, mul_subtensor, layout_1d); - auto broadcastmove = linear_ir->push_node(multiply.second, 512); - auto sub = linear_ir->push_node(param1.second, broadcastmove.second); - init_expr_descriptors(*sub.first, sub_subtensor, layout_1d); - auto result = linear_ir->push_node(sub.second); - auto begin = multiply.first; - auto end = result.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 512, vector_size, - {LoopPort((*multiply.first)->get_input_port(0)), - LoopPort((*multiply.first)->get_input_port(1)), - LoopPort((*sub.first)->get_input_port(0))}, - {LoopPort((*sub.first)->get_output_port(0))}); - linear_ir->set_loop_depth(1); - } - { - auto param00 = linear_ir_ref->push_node(input_precision, input_shape0); - auto param01 = linear_ir_ref->push_node(input_precision, input_shape0); - auto param1 = linear_ir_ref->push_node(input_precision, input_shape1); - auto multiply = linear_ir_ref->push_node(param00.second, param01.second); - init_expr_descriptors(*multiply.first, mul_subtensor, layout_1d); - auto broadcastmove = linear_ir_ref->push_node(multiply.second, 512); - auto sub = linear_ir_ref->push_node(param1.second, broadcastmove.second); - init_expr_descriptors(*sub.first, sub_subtensor, layout_1d); - auto result = linear_ir_ref->push_node(sub.second); - auto begin = sub.first; - auto end = result.first; - create_and_add_unified_loop_info(linear_ir_ref, begin, end, 512, vector_size, - {LoopPort((*sub.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(1))}, - {LoopPort((*sub.first)->get_output_port(0))}); - } -} - -TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsWithScalar) { - size_t vector_size = 16; - const auto input_precision = ov::element::f32; - const ov::Shape scalar_shape{1}; - const ov::Shape input_shape0{1}; - const ov::Shape input_shape1{512}; - const std::vector layout_1d{{0}, {0}, {0}}; - const std::vector mul_subtensor{{1}, {1}, {1}}; - const std::vector sub_subtensor{{512}, {1}, {512}}; - /* - * Param0 Scalar(loopBegin) - * \ / - * Multiply - * | - * Broadcast Param1 - * \ / - * Substract(loopBeginRef) - * | - * Result(LoopEnd and LoopEndRef) - */ - { - auto param0 = linear_ir->push_node(input_precision, input_shape0); - auto param1 = linear_ir->push_node(input_precision, input_shape1); - auto scalar = linear_ir->push_node(input_precision, scalar_shape, 3.8f); - auto multiply = linear_ir->push_node(param0.second, scalar.second); - init_expr_descriptors(*multiply.first, mul_subtensor, layout_1d); - auto broadcastmove = linear_ir->push_node(multiply.second, 512); - auto sub = linear_ir->push_node(param1.second, broadcastmove.second); - init_expr_descriptors(*sub.first, sub_subtensor, layout_1d); - auto result = linear_ir->push_node(sub.second); - auto begin = scalar.first; - auto end = result.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 512, vector_size, - {LoopPort((*multiply.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(0))}, - {LoopPort((*sub.first)->get_output_port(0))}); - linear_ir->set_loop_depth(1); - } - { - auto param0 = linear_ir_ref->push_node(input_precision, input_shape0); - auto param1 = linear_ir_ref->push_node(input_precision, input_shape1); - auto scalar = linear_ir_ref->push_node(input_precision, scalar_shape, 3.8f); - auto multiply = linear_ir_ref->push_node(param0.second, scalar.second); - init_expr_descriptors(*multiply.first, mul_subtensor, layout_1d); - auto broadcastmove = linear_ir_ref->push_node(multiply.second, 512); - auto sub = linear_ir_ref->push_node(param1.second, broadcastmove.second); - init_expr_descriptors(*sub.first, sub_subtensor, layout_1d); - auto result = linear_ir_ref->push_node(sub.second); - auto begin = sub.first; - auto end = result.first; - create_and_add_unified_loop_info(linear_ir_ref, begin, end, 512, vector_size, - {LoopPort((*sub.first)->get_input_port(0)), - LoopPort((*sub.first)->get_input_port(1))}, - {LoopPort((*sub.first)->get_output_port(0))}); - } -} - -TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsOutputLoopUpdateNotNeed) { - size_t vector_size = 16; - const auto input_precision = ov::element::f32; - const ov::Shape input_shape_a{3, 1}; - const ov::Shape input_shape_b{3, 16}; - const std::vector layout{{0, 1}, {0, 1}, {0, 1}}; - const std::vector subtensor_mul{{3, 1}, {3, 1}, {3, 1}}; - const std::vector subtensor_add{{3, 16}, {3, 16}, {3, 16}}; - /* - * Before: Param0, Param1, Param2, [[Multiply, Broadcast, Add, Sub]], Result0, Result1 - * After: Param0, Param1, Param2, [Multiply, Broadcast, [Add, Sub]], Result0, Result1 - * Param0(3,1) Param1(3,1) - * \ / - * Multiply - * | - * Broadcast Param2(3,16) - * \ / - * Add --- Result0 - * | - * Param3(3,16) --- Sub - * | - * Result1 - */ - { - auto param0 = linear_ir->push_node(input_precision, input_shape_a); - auto param1 = linear_ir->push_node(input_precision, input_shape_a); - auto param2 = linear_ir->push_node(input_precision, input_shape_b); - auto param3 = linear_ir->push_node(input_precision, input_shape_b); - auto multiply = linear_ir->push_node(param0.second, param1.second); - init_expr_descriptors(*multiply.first, subtensor_mul, layout); - auto broadcastmove = linear_ir->push_node(multiply.second, 16); - auto add = linear_ir->push_node(param2.second, broadcastmove.second); - init_expr_descriptors(*add.first, subtensor_add, layout); - auto sub = linear_ir->push_node(param3.second, add.second); - auto result0 = linear_ir->push_node(add.second); - auto result1 = linear_ir->push_node(sub.second); - auto begin = multiply.first; - auto end = result1.first; - create_and_add_unified_loop_info(linear_ir, begin, end, 16, vector_size, - {LoopPort((*multiply.first)->get_input_port(0), true, 0), - LoopPort((*multiply.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*sub.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0), - LoopPort((*sub.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, begin, end, 3, 1, - {LoopPort((*multiply.first)->get_input_port(0), true, 1), - LoopPort((*multiply.first)->get_input_port(1), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*sub.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1), - LoopPort((*sub.first)->get_output_port(0), true, 1)}); - linear_ir->set_loop_depth(2); - } - { - auto param0 = linear_ir_ref->push_node(input_precision, input_shape_a); - auto param1 = linear_ir_ref->push_node(input_precision, input_shape_a); - auto param2 = linear_ir_ref->push_node(input_precision, input_shape_b); - auto param3 = linear_ir_ref->push_node(input_precision, input_shape_b); - auto multiply = linear_ir_ref->push_node(param0.second, param1.second); - init_expr_descriptors(*multiply.first, subtensor_mul, layout); - auto broadcastmove = linear_ir_ref->push_node(multiply.second, 16); - auto add = linear_ir_ref->push_node(param2.second, broadcastmove.second); - init_expr_descriptors(*add.first, subtensor_add, layout); - auto sub = linear_ir_ref->push_node(param3.second, add.second); - auto result0 = linear_ir_ref->push_node(add.second); - auto result1 = linear_ir_ref->push_node(sub.second); - auto begin_inner = add.first; - auto end_inner = result1.first; - create_and_add_unified_loop_info(linear_ir_ref, begin_inner, end_inner, 16, vector_size, - {LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0), - LoopPort((*sub.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0), - LoopPort((*sub.first)->get_output_port(0), true, 0)}); - auto begin_outer = multiply.first; - auto end_outer = result1.first; - create_and_add_unified_loop_info(linear_ir_ref, begin_outer, end_outer, 3, 1, - {LoopPort((*multiply.first)->get_input_port(0), true, 1), - LoopPort((*multiply.first)->get_input_port(1), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*sub.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1), - LoopPort((*sub.first)->get_output_port(0), true, 1)}); - } -} - -TEST_F(ExtractLoopInvariantsTest, ExtractedLoopInvariantsFromInnermostToLoopOutside) { - size_t vector_size = 16; - const auto input_precision = ov::element::f32; - const ov::Shape input_shape_0{3, 512}; - const ov::Shape input_shape_1{1, 1}; - ov::snippets::VectorDims layout{0, 1}; - ov::snippets::VectorDims subtensor{3, 512}; - /* - * before: Param0, Param1, [[Broadcast, Add]], Result - * intermediate: Param0, Param1, [Broadcast, [Add]], Result - * after: Param0, Param1, Broadcast, [[Add]], Result - * Param0(3,512) Param1(1,1) - * \ / - * \ Broadcast - * \ / - * Add - * | - * Result - */ - { - auto param_0 = linear_ir->push_node(input_precision, input_shape_0); - auto param_1 = linear_ir->push_node(input_precision, input_shape_1); - auto broadcastmove = linear_ir->push_node(param_1.second, 512); - init_expr_descriptors(*broadcastmove.first, {{1, 1}, subtensor}, {layout, layout}); - auto add = linear_ir->push_node(param_0.second, broadcastmove.second); - init_expr_descriptors(*add.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); - auto result = linear_ir->push_node(add.second); - create_and_add_unified_loop_info(linear_ir, broadcastmove.first, result.first, 3, 1, - {LoopPort((*broadcastmove.first)->get_input_port(0), true, 1), - LoopPort((*add.first)->get_input_port(0), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1)}); - create_and_add_unified_loop_info(linear_ir, broadcastmove.first, result.first, 512, vector_size, - {LoopPort((*broadcastmove.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(0), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0)}); - linear_ir->set_loop_depth(2); - } - { - auto param_0 = linear_ir_ref->push_node(input_precision, input_shape_0); - auto param_1 = linear_ir_ref->push_node(input_precision, input_shape_1); - auto broadcastmove = linear_ir_ref->push_node(param_1.second, 512); - init_expr_descriptors(*broadcastmove.first, {{1, 1}, subtensor}, {layout, layout}); - auto add = linear_ir_ref->push_node(param_0.second, broadcastmove.second); - init_expr_descriptors(*add.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); - auto result = linear_ir_ref->push_node(add.second); - create_and_add_unified_loop_info(linear_ir_ref, add.first, result.first, 3, 1, - {LoopPort((*add.first)->get_input_port(0), true, 1), - LoopPort((*add.first)->get_input_port(1), true, 1)}, - {LoopPort((*add.first)->get_output_port(0), true, 1)}); - create_and_add_unified_loop_info(linear_ir_ref, add.first, result.first, 512, vector_size, - {LoopPort((*add.first)->get_input_port(0), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*add.first)->get_output_port(0), true, 0)}); - } -} - -class ExtractLoopInvariantsRemoveLoopsTest : public LoweredPassTestsF { -public: - ExtractLoopInvariantsRemoveLoopsTest() : LoweredPassTestsF() { - comparator.enable(LIRComparator::LIRCmpValues::LOOP_INDICES); - comparator.enable(LIRComparator::LIRCmpValues::PORT_DESCRIPTORS); - comparator.enable(LIRComparator::LIRCmpValues::PORT_CONNECTORS); - comparator.enable(LIRComparator::LIRCmpValues::LOOP_MANAGER); - } - - void SetUp() override { - pipeline.register_pass(); - pipeline.register_pass(); // loop could be removed and loop index could be different, normalize it - } -}; - -// softmax with shape of 1 for innermost dimension. -// Cover multiple(all) exprs are extracted, and inner loops are removed. -TEST_F(ExtractLoopInvariantsRemoveLoopsTest, ExtractedLoopInvariantsAllExprsInLoopExtracted) { - size_t vector_size = 16; - const auto input_precision = ov::element::f32; - const ov::Shape input_shape{10, 1}; - ov::snippets::VectorDims layout{0, 1}; - ov::snippets::VectorDims subtensor{10, 1}; - /* - * Param Vector - * | | | - * | Maximum - * | | - * | HorizonMax - * | | - * Sub - * | - * Vector Exp - * | | | - * Add | - * | | - * HAdd | - * | | - * Power | - * | | - * Multiply - * | - * Result - */ - { - auto param = linear_ir->push_node(input_precision, input_shape); - auto vector_max = linear_ir->push_node(input_precision); - auto vector_sum = linear_ir->push_node(input_precision); - auto max = linear_ir->push_node(param.second, vector_max.second); - init_expr_descriptors(*max.first, {{subtensor}, {1}, {subtensor}}, {layout, {0}, layout}); - auto hmax = linear_ir->push_node(max.second); - auto sub = linear_ir->push_node(param.second, hmax.second); - init_expr_descriptors(*sub.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); - auto exp = linear_ir->push_node(sub.second); - init_expr_descriptors(*exp.first, {subtensor, subtensor}, {layout, layout}); - auto add = linear_ir->push_node(exp.second, vector_sum.second); - init_expr_descriptors(*add.first, {subtensor, {1}, subtensor}, {layout, {0}, layout}); - auto hsum = linear_ir->push_node(add.second); - auto power_static = linear_ir->push_node(hsum.second, -1.0f); - init_expr_descriptors(*power_static.first, {subtensor, subtensor}, {layout, layout}); - auto multiply = linear_ir->push_node(exp.second, power_static.second); - init_expr_descriptors(*multiply.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); - auto result = linear_ir->push_node(multiply.second); - // 3 inner loop - create_and_add_unified_loop_info(linear_ir, max.first, hmax.first, 1, vector_size, - {LoopPort((*max.first)->get_input_port(0), true, 0), - LoopPort((*max.first)->get_input_port(1), true, 0)}, - {LoopPort((*max.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, sub.first, hsum.first, 1, vector_size, - {LoopPort((*sub.first)->get_input_port(0), true, 0), - LoopPort((*sub.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*exp.first)->get_output_port(0), true, 0), - LoopPort((*add.first)->get_output_port(0), true, 0)}); - create_and_add_unified_loop_info(linear_ir, multiply.first, result.first, 1, vector_size, - {LoopPort((*multiply.first)->get_input_port(0), true, 0), - LoopPort((*multiply.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 0)}); - // outer loop info - const auto loop_begin = std::make_shared(); - auto loop_begin_expr = linear_ir->insert_node(loop_begin, std::vector{}, {}, false, max.first); - const auto loop_end = std::make_shared(); - std::vector loop_end_inputs{(*loop_begin_expr)->get_output_port_connector(0)}; - auto loop_end_expr = linear_ir->insert_node(loop_end, loop_end_inputs, {}, false, result.first); - create_and_add_unified_loop_info(linear_ir, loop_begin_expr, result.first, 10, 1, - {LoopPort((*max.first)->get_input_port(0), true, 1), - LoopPort((*max.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 1)}); - loop_end->set_id((*loop_end_expr)->get_loop_ids().back()); - linear_ir->set_loop_depth(2); - } - { - auto param = linear_ir_ref->push_node(input_precision, input_shape); - auto vector_max = linear_ir_ref->push_node(input_precision); - auto vector_sum = linear_ir_ref->push_node(input_precision); - auto max = linear_ir_ref->push_node(param.second, vector_max.second); - init_expr_descriptors(*max.first, {{subtensor}, {1}, {subtensor}}, {layout, {0}, layout}); - auto hmax = linear_ir_ref->push_node(max.second); - auto sub = linear_ir_ref->push_node(param.second, hmax.second); - init_expr_descriptors(*sub.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); - auto exp = linear_ir_ref->push_node(sub.second); - init_expr_descriptors(*exp.first, {subtensor, subtensor}, {layout, layout}); - auto add = linear_ir_ref->push_node(exp.second, vector_sum.second); - init_expr_descriptors(*add.first, {subtensor, {1}, subtensor}, {layout, {0}, layout}); - auto hsum = linear_ir_ref->push_node(add.second); - auto power_static = linear_ir_ref->push_node(hsum.second, -1.0f); - init_expr_descriptors(*power_static.first, {subtensor, subtensor}, {layout, layout}); - auto multiply = linear_ir_ref->push_node(exp.second, power_static.second); - init_expr_descriptors(*multiply.first, {subtensor, subtensor, subtensor}, {layout, layout, layout}); - auto result = linear_ir_ref->push_node(multiply.second); - // outer loop - const auto loop_begin = std::make_shared(); - auto loop_begin_expr = linear_ir_ref->insert_node(loop_begin, std::vector{}, {}, false, max.first); - const auto loop_end = std::make_shared(); - std::vector loop_end_inputs{(*loop_begin_expr)->get_output_port_connector(0)}; - auto loop_end_expr = linear_ir_ref->insert_node(loop_end, loop_end_inputs, {}, false, result.first); - create_and_add_unified_loop_info(linear_ir_ref, loop_begin_expr, result.first, 10, 1, - {LoopPort((*max.first)->get_input_port(0), true, 1), - LoopPort((*max.first)->get_input_port(1), true, 0), - LoopPort((*add.first)->get_input_port(1), true, 0)}, - {LoopPort((*multiply.first)->get_output_port(0), true, 1)}); - loop_end->set_id((*loop_end_expr)->get_loop_ids().back()); - } -} - -} // namespace snippets -} // namespace test -} // namespace ov diff --git a/src/core/include/openvino/op/roi_align_rotated.hpp b/src/core/include/openvino/op/roi_align_rotated.hpp index 5e7bf95fa68bcc..80366e7b9878ef 100644 --- a/src/core/include/openvino/op/roi_align_rotated.hpp +++ b/src/core/include/openvino/op/roi_align_rotated.hpp @@ -55,7 +55,7 @@ class OPENVINO_API ROIAlignRotated : public util::ROIAlignBase { } private: - bool m_clockwise_mode; + bool m_clockwise_mode{}; }; } // namespace v15 } // namespace op diff --git a/src/core/src/op/assign.cpp b/src/core/src/op/assign.cpp index da79244127c317..bf6e55c11b1d39 100644 --- a/src/core/src/op/assign.cpp +++ b/src/core/src/op/assign.cpp @@ -29,7 +29,7 @@ void Assign::validate_and_infer_types() { for (const auto& input : inputs()) { start_nodes.push_back(input.get_source_output().get_node_shared_ptr()); } - auto nodes = topological_sort(start_nodes); + auto nodes = topological_sort(std::move(start_nodes)); for (const auto& node : nodes) { if (auto read_value = ov::as_type_ptr(node)) { if (read_value->get_variable_id() == m_variable_id) @@ -109,15 +109,15 @@ bool Assign::evaluate(TensorVector& outputs, auto& variable_context = const_cast(found_context->second.as()); - const auto& variable_values = variable_context.get_variable_values(); + auto var_value = variable_context.get_variable_value(m_variable); // automatically allocate memory if not provided by user - if (variable_values.find(m_variable) == variable_values.end()) { + if (!var_value) { auto tensor = Tensor(m_variable->get_info().data_type, m_variable->get_info().data_shape.to_shape()); - variable_context.set_variable_value(m_variable, std::make_shared(tensor)); + var_value = std::make_shared(tensor); + variable_context.set_variable_value(m_variable, var_value); } - const auto var_value = variable_values.find(m_variable)->second; var_value->set_reset(false); auto memory_buffer = var_value->get_state(); memory_buffer.set_shape(inputs[0].get_shape()); diff --git a/src/core/src/op/concat.cpp b/src/core/src/op/concat.cpp index de24245fa28458..607848b0ef0555 100644 --- a/src/core/src/op/concat.cpp +++ b/src/core/src/op/concat.cpp @@ -39,9 +39,9 @@ void Concat::validate_and_infer_types() { input_shapes.push_back(get_input_partial_shape(i)); } - const auto output_shape = shape_infer(this, input_shapes).front(); + const auto output_shapes = shape_infer(this, input_shapes); - set_output_type(0, inputs_et, output_shape); + set_output_type(0, inputs_et, output_shapes[0]); } std::shared_ptr Concat::clone_with_new_inputs(const OutputVector& new_args) const { diff --git a/src/core/src/op/convert.cpp b/src/core/src/op/convert.cpp index 70f47d6ee67556..8dd0675fe6a6ed 100644 --- a/src/core/src/op/convert.cpp +++ b/src/core/src/op/convert.cpp @@ -162,8 +162,8 @@ bool evaluate_bound(const Node* const node, TensorVector& output_values, const T return false; // dynamic values translation - auto input_dynamic_mask = Tensor(element::boolean, in_bound_shape); - auto outputs = TensorVector{input_dynamic_mask}; + auto outputs = TensorVector{{element::boolean, in_bound_shape}}; + const auto& input_dynamic_mask = outputs[0]; return v1::Equal().evaluate(outputs, {input_bound, input_max}) && v1::Select().evaluate(output_values, {input_dynamic_mask, output_max, output_values[0]}); diff --git a/src/core/src/op/eye.cpp b/src/core/src/op/eye.cpp index 1230ce9e4d8f9f..7d4505d52816bc 100644 --- a/src/core/src/op/eye.cpp +++ b/src/core/src/op/eye.cpp @@ -57,8 +57,8 @@ void Eye::validate_and_infer_types() { input_et); } - const auto output_shape = shape_infer(this, ov::util::get_node_input_partial_shapes(*this)).front(); - set_output_type(0, get_out_type(), output_shape); + const auto output_shapes = shape_infer(this, ov::util::get_node_input_partial_shapes(*this)); + set_output_type(0, get_out_type(), output_shapes[0]); } bool Eye::visit_attributes(ov::AttributeVisitor& visitor) { diff --git a/src/core/src/op/if.cpp b/src/core/src/op/if.cpp index cf393e06aba859..d8ac09096e714f 100644 --- a/src/core/src/op/if.cpp +++ b/src/core/src/op/if.cpp @@ -40,9 +40,9 @@ static ov::PartialShape resolve_shape(const ov::PartialShape& then_pshape, const return ov::PartialShape::dynamic(); } } - std::vector new_dims; + ov::PartialShape new_dims; - // If rangs are equal each dimesion of then_body output is union with each dimension of + // If ranges are equal each dimension of then_body output is union with each dimension of // else_body for (auto then_it = then_pshape.cbegin(), else_it = else_pshape.cbegin(); then_it != then_pshape.cend(); then_it++, else_it++) { @@ -57,7 +57,7 @@ static ov::PartialShape resolve_shape(const ov::PartialShape& then_pshape, const } } - return ov::PartialShape(new_dims); + return new_dims; } bool ov::op::v8::If::visit_attributes(AttributeVisitor& visitor) { diff --git a/src/core/src/op/loop.cpp b/src/core/src/op/loop.cpp index 2d2ed726cea339..8c734e894a8e35 100644 --- a/src/core/src/op/loop.cpp +++ b/src/core/src/op/loop.cpp @@ -179,8 +179,8 @@ void Loop::validate_and_infer_types() { as_type_ptr(input_description)) { auto body_parameter = m_bodies[0]->get_parameters().at(invariant_input_description->m_body_parameter_index); - auto input_partial_shape = input(index).get_partial_shape(); - auto input_type = input(index).get_element_type(); + const auto& input_partial_shape = input(index).get_partial_shape(); + const auto& input_type = input(index).get_element_type(); body_parameter->set_partial_shape(input_partial_shape); body_parameter->set_element_type(input_type); diff --git a/src/core/src/op/max_pool.cpp b/src/core/src/op/max_pool.cpp index 78df2bf78b8664..741f57105efb4d 100644 --- a/src/core/src/op/max_pool.cpp +++ b/src/core/src/op/max_pool.cpp @@ -204,9 +204,9 @@ bool MaxPool::evaluate(TensorVector& outputs, const TensorVector& inputs) const const auto input_shapes = std::vector{inputs[0].get_shape()}; auto pads_begin = m_pads_begin; auto pads_end = m_pads_end; - const auto output_shape = shape_infer(this, input_shapes, pads_begin, pads_end).front(); + const auto output_shapes = shape_infer(this, input_shapes, pads_begin, pads_end); - outputs[0].set_shape(output_shape.get_shape()); + outputs[0].set_shape(output_shapes[0].get_shape()); using namespace ov::element; return IF_TYPE_OF_CONVERT_TENSORS(v1_MaxPool_evaluate, this, diff --git a/src/core/src/op/mod.cpp b/src/core/src/op/mod.cpp index e741ddfe8b9efd..1d15ffb60d4971 100644 --- a/src/core/src/op/mod.cpp +++ b/src/core/src/op/mod.cpp @@ -110,8 +110,8 @@ Tensor evaluate_undefined_result_mask(const TensorVector& bounds) { const auto& in_et = bounds.front().get_element_type(); - auto zero_t = ov::util::make_tensor_of_value(in_et, 0); - auto max_t = ov::util::make_tensor_of_max_value(in_et); + const auto zero_t = ov::util::make_tensor_of_value(in_et, 0); + const auto max_t = ov::util::make_tensor_of_max_value(in_et); const auto& v_ub = bounds[1]; const auto& m_lb = bounds[2]; @@ -159,7 +159,7 @@ TensorVector get_bounds_with_valid_values(const TensorVector& bounds, const Tens auto m_bounds = TensorVector(); m_bounds.reserve(bounds.size()); - std::transform(bounds.cbegin(), bounds.cend(), std::back_inserter(m_bounds), [&](const Tensor& b) { + std::transform(bounds.cbegin(), bounds.cend(), std::back_inserter(m_bounds), [&](const Tensor& b) -> ov::Tensor { auto tmp = TensorVector{{b.get_element_type(), mask.get_shape()}}; return select_op.evaluate(tmp, {mask, one_t, b}) ? tmp.front() : Tensor{}; }); @@ -205,7 +205,7 @@ bool evaluate_bound(const Node* const op, TensorVector& outputs, bool is_lower) } // Set undefined bound value for results which cannot be calculated. const auto select_op = v1::Select(); - const auto undefined_bound = + const auto& undefined_bound = is_lower ? ov::util::make_tensor_of_value(in_et, 0) : ov::util::make_tensor_of_max_value(in_et); return select_op.evaluate(outputs, {undefined_result_mask, undefined_bound, outputs.front()}); } else { diff --git a/src/core/src/op/non_zero.cpp b/src/core/src/op/non_zero.cpp index 7f8169354b70c0..83b98f19a60d4b 100644 --- a/src/core/src/op/non_zero.cpp +++ b/src/core/src/op/non_zero.cpp @@ -80,22 +80,18 @@ void NonZero::validate_and_infer_types() { if (input_shape.rank().compatible(0)) { set_output_type(0, m_output_type, PartialShape::dynamic(2)); } else { - auto dim = Dimension{0, 1}; + auto output_shape = PartialShape{input_shape.rank(), {0, 1}}; + auto& dim = output_shape[1]; for (auto&& d : input_shape) dim *= d; - set_output_type(0, m_output_type, PartialShape{input_shape.rank(), dim}); + set_output_type(0, m_output_type, output_shape); } set_input_is_relevant_to_shape(0); if (const auto input_constant = ov::util::get_constant_from_source(input_value(0))) { // input_value is available to calculate output shape - - // const_cast of Constant data is needed to avoid obsolete copy of this data into the Tensor. - // It's safe here as evaluate() method doesn't modify input Tensors. - const auto inputs = TensorVector{{input_constant->get_element_type(), - input_constant->get_shape(), - const_cast(input_constant->get_data_ptr())}}; + const auto inputs = TensorVector{input_constant->get_tensor_view()}; auto outputs = TensorVector{{m_output_type, {}}}; if (!evaluate(outputs, inputs)) return; diff --git a/src/core/src/op/parameter.cpp b/src/core/src/op/parameter.cpp index 16cd315f9dff0e..705c11f31196b5 100644 --- a/src/core/src/op/parameter.cpp +++ b/src/core/src/op/parameter.cpp @@ -84,7 +84,7 @@ bool AttributeAdapter::visit_attributes(AttributeVisitor& visit } visitor.on_attribute(index.str(), id); if (!m_ref[i]) { - m_ref[i] = ov::as_type_ptr(visitor.get_registered_node(id)); + m_ref[i] = ov::as_type_ptr(visitor.get_registered_node(std::move(id))); } } return true; diff --git a/src/core/src/op/result.cpp b/src/core/src/op/result.cpp index b500ae13100ad2..3667e5ff22b422 100644 --- a/src/core/src/op/result.cpp +++ b/src/core/src/op/result.cpp @@ -99,7 +99,7 @@ bool AttributeAdapter::visit_attributes(AttributeVisitor& visitor) } visitor.on_attribute(index.str(), id); if (!m_ref[i]) { - m_ref[i] = as_type_ptr(visitor.get_registered_node(id)); + m_ref[i] = as_type_ptr(visitor.get_registered_node(std::move(id))); } } return true; diff --git a/src/core/src/op/space_to_batch.cpp b/src/core/src/op/space_to_batch.cpp index a641586c6879aa..7d7880f937eba2 100644 --- a/src/core/src/op/space_to_batch.cpp +++ b/src/core/src/op/space_to_batch.cpp @@ -113,7 +113,7 @@ bool evaluate(TensorVector& outputs, const TensorVector& inputs) { pads_begin_vec, pads_end_vec, op::PadMode::CONSTANT); - data_shape = padded_shape; + data_shape = std::move(padded_shape); Shape dispersed_shape(block_values_size + 1); std::vector axes_order(block_values_size + 1); diff --git a/src/core/src/op/type_relaxed.cpp b/src/core/src/op/type_relaxed.cpp index 09e4d37cfe0a5f..67a44b410cde57 100644 --- a/src/core/src/op/type_relaxed.cpp +++ b/src/core/src/op/type_relaxed.cpp @@ -56,9 +56,10 @@ void TypeRelaxedBase::restore_input_data_types(Node& node, const element::TypeVe } } -TemporaryReplaceOutputType::TemporaryReplaceOutputType(Output output, element::Type tmp_type) : m_output(output) { +TemporaryReplaceOutputType::TemporaryReplaceOutputType(Output output, element::Type tmp_type) + : m_output(std::move(output)), + orig_type(m_output.get_element_type()) { // save original element type in order to restore it in the destructor - orig_type = m_output.get_element_type(); ov::descriptor::set_element_type(m_output.get_tensor(), tmp_type); } diff --git a/src/core/src/op/unique.cpp b/src/core/src/op/unique.cpp index 8088f1fa7098d4..b00f207f820ae6 100644 --- a/src/core/src/op/unique.cpp +++ b/src/core/src/op/unique.cpp @@ -163,11 +163,9 @@ void op::v10::Unique::validate_and_infer_types() { rev_idx_size = Dimension{dim_at_axis.get_max_length()}; } - auto output_shape = input_shape; - output_shape[normalized_axis] = output_dim_at_axis; - output_shapes[0] = output_shape; - - output_shapes[2] = PartialShape{rev_idx_size}; + output_shapes[0] = input_shape; + output_shapes[0][normalized_axis] = std::move(output_dim_at_axis); + output_shapes[2] = PartialShape{std::move(rev_idx_size)}; } } else { // no axis => flattened input tensor diff --git a/src/core/src/op/util/broadcast_base.cpp b/src/core/src/op/util/broadcast_base.cpp index 1d4f7c1541ae1f..59154e45e2b37a 100644 --- a/src/core/src/op/util/broadcast_base.cpp +++ b/src/core/src/op/util/broadcast_base.cpp @@ -197,17 +197,16 @@ void ov::op::util::BroadcastBase::validate_and_infer_types() { if (!output_shape_defined && concat->get_output_partial_shape(0).is_static() && concat->get_shape().size() == 1 && concat_inputs.size() == shape_size(concat->get_shape())) { - auto output_partial_shape = std::vector{}; + output_shape.resize(0); for (const auto& concat_input : concat_inputs) { auto source_node_ptr = concat_input.get_source_output().get_node_shared_ptr(); if (auto source_const_ptr = ov::as_type_ptr(source_node_ptr)) { - output_partial_shape.emplace_back(source_const_ptr->get_axis_vector_val()[0]); + output_shape.emplace_back(source_const_ptr->get_axis_vector_val()[0]); } else { - output_partial_shape.push_back(Dimension::dynamic()); + output_shape.push_back(Dimension::dynamic()); } } output_shape_defined = true; - output_shape = PartialShape(output_partial_shape); } } diff --git a/src/core/src/op/util/framework_node.cpp b/src/core/src/op/util/framework_node.cpp index 9a3389a020b8a7..a7a71203c3d1c0 100644 --- a/src/core/src/op/util/framework_node.cpp +++ b/src/core/src/op/util/framework_node.cpp @@ -94,7 +94,7 @@ void ov::op::util::FrameworkNode::validate_and_infer_types() { pshape = shape_map.at(output_index); } if (PartialShape::merge_into(pshape, node_result.get_partial_shape())) { - shape_map[output_index] = pshape; + shape_map[output_index] = std::move(pshape); } else { shape_map[output_index] = PartialShape::dynamic(); } diff --git a/src/core/src/op/util/index_reduction.cpp b/src/core/src/op/util/index_reduction.cpp index dac49b32c7738b..1c40af33785e73 100644 --- a/src/core/src/op/util/index_reduction.cpp +++ b/src/core/src/op/util/index_reduction.cpp @@ -70,7 +70,7 @@ void ov::op::util::IndexReduction::validate_and_infer_types() { output_dims[i] = arg_shape[j++]; } - output_shape = PartialShape(output_dims); + output_shape = PartialShape(std::move(output_dims)); } set_output_type(0, m_index_element_type, output_shape); diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 51a9748f27a07b..85867ac5d04ec9 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -901,6 +901,69 @@ void jit_is_inf_emitter::register_table_entries() { push_arg_entry_of("inf_neg", 0xFF800000, true); } +/// IS_NAN /// +jit_is_nan_emitter::jit_is_nan_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { + auto isNaN = ov::as_type_ptr(node); + if (isNaN == nullptr) { + OV_CPU_JIT_EMITTER_THROW("Can't cast to ov::op::v10::IsNaN"); + } + + prepare_table(); +} + +jit_is_nan_emitter::jit_is_nan_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc) + : jit_emitter(host, host_isa, exec_prc) { + prepare_table(); +} + +size_t jit_is_nan_emitter::get_inputs_count() const { return 1; } + +size_t jit_is_nan_emitter::get_aux_vecs_count() const { return 1; } + +size_t jit_is_nan_emitter::get_aux_gprs_count() const { return 1; } + +std::set> jit_is_nan_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32}}; +} + +void jit_is_nan_emitter::emit_impl(const std::vector& in_vec_idxs, const std::vector& out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_is_nan_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + + TReg src = TReg(in_vec_idxs[0]); + TReg dst = TReg(out_vec_idxs[0]); + TReg aux = TReg(aux_vec_idxs[0]); + + // According to the IEEE standard, NaN values have the odd property that comparisons involving them are always false. + h->fcmeq(dst.s, src.s, src.s); + h->ld1r(aux.s, table_val2("zero")); + h->fcmeq(dst.s, dst.s, aux.s); + // Sets elements in 'dst' to 1.0 where the comparison was true. + h->ld1r(aux.s, table_val2("one")); + h->and_(dst.b16, dst.b16, aux.b16); +} + +void jit_is_nan_emitter::register_table_entries() { + // Registers constant values that comply with the IEEE 754 standard. + push_arg_entry_of("one", 0x3f800000, true); + push_arg_entry_of("zero", 0x00000000, true); +} + /// MAX /// jit_maximum_emitter::jit_maximum_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index ba107907d7b967..8dda7f3e91c7d4 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -305,6 +305,33 @@ class jit_hswish_emitter : public jit_emitter { void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; }; +class jit_is_nan_emitter : public jit_emitter { +public: + jit_is_nan_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_is_nan_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + size_t get_aux_gprs_count() const override; + + static std::set> get_supported_precisions(const std::shared_ptr& node = nullptr); + +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + + void register_table_entries() override; +}; + class jit_maximum_emitter : public jit_emitter { public: jit_maximum_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 01cbb57f49a32c..3097824c07ff47 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -30,6 +30,7 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseGeluTanh, Algorithm::EltwiseHswish, Algorithm::EltwiseIsInf, + Algorithm::EltwiseIsNaN, Algorithm::EltwiseMaximum, Algorithm::EltwiseMinimum, Algorithm::EltwiseMish, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index 1d0368b1a318c5..eee26497593474 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -650,6 +650,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseFloor, ov::intel_cpu::aarch64::jit_floor_emitter), OV_CASE(Algorithm::EltwiseHswish, ov::intel_cpu::aarch64::jit_hswish_emitter), OV_CASE(Algorithm::EltwiseIsInf, ov::intel_cpu::aarch64::jit_is_inf_emitter), + OV_CASE(Algorithm::EltwiseIsNaN, ov::intel_cpu::aarch64::jit_is_nan_emitter), OV_CASE(Algorithm::EltwiseMaximum, ov::intel_cpu::aarch64::jit_maximum_emitter), OV_CASE(Algorithm::EltwiseMinimum, ov::intel_cpu::aarch64::jit_minimum_emitter), OV_CASE(Algorithm::EltwiseMish, ov::intel_cpu::aarch64::jit_mish_emitter), @@ -823,6 +824,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseGeluTanh, jit_gelu_tanh_emitter), OV_CASE(Algorithm::EltwiseHswish, jit_hswish_emitter), OV_CASE(Algorithm::EltwiseIsInf, jit_is_inf_emitter), + OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter), OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), OV_CASE(Algorithm::EltwiseMinimum, jit_minimum_emitter), OV_CASE(Algorithm::EltwiseMish, jit_mish_emitter), diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp index 3579a33c23414a..876ee3e5e22d72 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/classes/activation.cpp @@ -175,6 +175,7 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType (activation_type == utils::ActivationTypes::HSwish) || (activation_type == utils::ActivationTypes::IsInf) || (activation_type == utils::ActivationTypes::HardSigmoid) || + (activation_type == utils::ActivationTypes::IsNaN) || (activation_type == utils::ActivationTypes::Mish) || (activation_type == utils::ActivationTypes::GeluErf) || (activation_type == utils::ActivationTypes::GeluTanh) || @@ -190,7 +191,8 @@ std::string ActivationLayerCPUTest::getPrimitiveType(const utils::ActivationType return ""; } #endif - if (activation_type == utils::ActivationTypes::Floor) { + if ((activation_type == utils::ActivationTypes::Floor) || + (activation_type == utils::ActivationTypes::IsNaN)) { return "ref"; } return "acl"; @@ -227,6 +229,7 @@ const std::map>>& activat {GeluTanh, {{}}}, {SoftSign, {{}}}, {SoftPlus, {{}}}, + {IsNaN, {{}}}, }; return activationTypes; diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp index 83b648779bbb49..5b6f0c276799b1 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/activation.cpp @@ -55,7 +55,8 @@ const std::map>> activationTypes {ActivationTypes::GeluErf, {}}, {ActivationTypes::GeluTanh, {}}, {ActivationTypes::Swish, {{0.4f}}}, - {ActivationTypes::IsInf, {}} + {ActivationTypes::IsInf, {}}, + {ActivationTypes::IsNaN, {{}}}, }; // List of operations that should be tested also with integer precision diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/activation.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/activation.hpp index 1390e7a6a00269..4378db331abf19 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/activation.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/single_op/activation.hpp @@ -68,6 +68,7 @@ static std::map activationNames = { {ActivationTypes::GeluTanh, "GeluTanh"}, {ActivationTypes::SoftSign, "SoftSign"}, {ActivationTypes::IsInf, "IsInf"}, + {ActivationTypes::IsNaN, "IsNaN"}, }; typedef std::tuple< diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp index e0bf7bfec04349..98e4416f42a53c 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/test_enums.hpp @@ -122,7 +122,8 @@ enum ActivationTypes { GeluErf, GeluTanh, SoftSign, - IsInf + IsInf, + IsNaN, }; enum MinMaxOpType { diff --git a/src/tests/test_utils/common_test_utils/src/node_builders/activation.cpp b/src/tests/test_utils/common_test_utils/src/node_builders/activation.cpp index 18bf6cd100ec67..b28949d867e4f3 100644 --- a/src/tests/test_utils/common_test_utils/src/node_builders/activation.cpp +++ b/src/tests/test_utils/common_test_utils/src/node_builders/activation.cpp @@ -25,6 +25,7 @@ #include "openvino/op/hsigmoid.hpp" #include "openvino/op/hswish.hpp" #include "openvino/op/is_inf.hpp" +#include "openvino/op/is_nan.hpp" #include "openvino/op/log.hpp" #include "openvino/op/mish.hpp" #include "openvino/op/negative.hpp" @@ -147,6 +148,8 @@ std::shared_ptr make_activation(const ov::Output& in, return std::make_shared(in); case ov::test::utils::ActivationTypes::IsInf: return std::make_shared(in); + case ov::test::utils::ActivationTypes::IsNaN: + return std::make_shared(in); default: OPENVINO_THROW("Can't create layer for this activation type"); }