diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp index 9a7d652866643b..ef357921cb362d 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,7 @@ void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork void InferenceEnginePython::CompressModelTransformation(InferenceEnginePython::IENetwork network) { ngraph::pass::Manager manager; + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.run_passes(network.actual->getFunction()); diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/division_to_zero_fp16_resolver.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/division_to_zero_fp16_resolver.hpp index da4311c10652a8..d87ab3af887c5d 100644 --- a/inference-engine/src/transformations/include/transformations/common_optimizations/division_to_zero_fp16_resolver.hpp +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/division_to_zero_fp16_resolver.hpp @@ -11,20 +11,23 @@ #include #include "ngraph/pattern/matcher.hpp" -namespace ngraph { +namespace ov { namespace pass { class TRANSFORMATIONS_API DivisionToZeroFP16Resolver; } // namespace pass -} // namespace ngraph +} // namespace ov /** * @ingroup ie_transformation_common_api - * @brief : - + * @brief: clamps eps into fp16 minimal normalized value in input_1/Maximum(input_2, eps) and input_1/Add(input_2, eps) patterns + * + * eps must be always nonzero to prevent from NaNs in such expressions if input_1 and input_2 simultaneously happened to be zero. + * We should keep in such patterns eps >= fp16 minimal normalized value so that + * CompressFloatConstants should not cast them into zero during compression into f16. */ -class ngraph::pass::DivisionToZeroFP16Resolver: public ngraph::pass::MatcherPass { +class ov::pass::DivisionToZeroFP16Resolver: public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; DivisionToZeroFP16Resolver(); diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/division_to_zero_fp16_resolver.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/division_to_zero_fp16_resolver.cpp index 9a6534066223ca..40431c4dbba73e 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/division_to_zero_fp16_resolver.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/division_to_zero_fp16_resolver.cpp @@ -9,49 +9,47 @@ #include #include -#include -#include -#include -#include +#include +#include "ngraph/rt_info.hpp" +#include +#include -NGRAPH_RTTI_DEFINITION(ngraph::pass::DivisionToZeroFP16Resolver, "DivisionToZeroFP16Resolver", 0); +NGRAPH_RTTI_DEFINITION(ov::pass::DivisionToZeroFP16Resolver, "DivisionToZeroFP16Resolver", 0); -constexpr float normalized_fp16_min = 6.103515625e-05f; // normalized minimum of fp16 +constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value -ngraph::pass::DivisionToZeroFP16Resolver::DivisionToZeroFP16Resolver() { - MATCHER_SCOPE(DivisionToZeroFP16Resolver); - auto input_1 = ngraph::pattern::any_input(); - auto input_2 = ngraph::pattern::any_input(); +using namespace ov; +ov::pass::DivisionToZeroFP16Resolver::DivisionToZeroFP16Resolver() { + MATCHER_SCOPE(DivisionToZeroFP16Resolver); + auto input_1 = pattern::any_input(); + auto input_2 = pattern::any_input(); - auto eps_const_pattern = ngraph::pattern::wrap_type(); - auto max = std::make_shared(input_2, eps_const_pattern); - auto add = std::make_shared(input_2, eps_const_pattern); + auto eps_const_pattern = pattern::wrap_type(); + auto max = std::make_shared(input_2, eps_const_pattern); + auto add = std::make_shared(input_2, eps_const_pattern); auto max_or_add = std::make_shared(OutputVector{max, add}); - auto divide = std::make_shared(input_1, max_or_add); + auto divide = std::make_shared(input_1, max_or_add); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + matcher_pass_callback callback = [=](pattern::Matcher& m) { const auto& pattern_to_output = m.get_pattern_value_map(); - const auto eps_const = std::dynamic_pointer_cast(pattern_to_output.at(eps_const_pattern).get_node_shared_ptr()); - - if (!eps_const) { + const auto eps_const = std::dynamic_pointer_cast(pattern_to_output.at(eps_const_pattern).get_node_shared_ptr()); + if (!eps_const) return false; - } - for (float val : eps_const->get_vector()) { - if (val >= normalized_fp16_min) { + + for (float val : eps_const->get_vector()) + if (val >= normalized_fp16_min) return false; - } - } auto new_constant = std::make_shared(eps_const->get_element_type(), eps_const->get_shape(), normalized_fp16_min); - ngraph::copy_runtime_info(eps_const, new_constant); - ngraph::replace_node(eps_const, new_constant); + copy_runtime_info(eps_const, new_constant); + replace_node(eps_const, new_constant); return true; }; - auto m = std::make_shared(divide, matcher_name); + auto m = std::make_shared(divide, matcher_name); register_matcher(m, callback); } diff --git a/inference-engine/tests/functional/inference_engine/transformations/division_to_zero_fp16_resolver_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/division_to_zero_fp16_resolver_test.cpp index fde15ce357aeea..b57010889cea98 100644 --- a/inference-engine/tests/functional/inference_engine/transformations/division_to_zero_fp16_resolver_test.cpp +++ b/inference-engine/tests/functional/inference_engine/transformations/division_to_zero_fp16_resolver_test.cpp @@ -8,47 +8,103 @@ #include #include -#include -#include +#include +#include #include #include -#include #include "common_test_utils/ngraph_test_utils.hpp" using namespace testing; -constexpr float normalized_fp16_min = 6.103515625e-05f; // normalized minimum of fp16 +using namespace ov; +constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value + + +TEST_F(TransformationTestsF, DivisionToZeroMinimalPattern) { + const float eps_value = 1.e-12; + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); + + function = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + + manager.register_pass(); + } + + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); + + function_ref = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + + manager.register_pass(); + } +} + +TEST_F(TransformationTestsF, DivisionToZeroMinimalPatternUnchanged) { + // if eps_value is greater than normalized_fp16_min then leave graph unchanged + const float eps_value = 0.000099f; + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); + + function = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + + manager.register_pass(); + } + + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); + + function_ref = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + + manager.register_pass(); + } +} TEST_F(TransformationTestsF, DivisionToZeroWithMax) { const float eps_value = 1.e-12; { - auto input = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(3)); - auto exp = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{}, {2.f}); - auto pow = std::make_shared(input, exp); - auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {0, 1}); - auto reduce_sum = std::make_shared(pow, axes_const); - auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{}, {eps_value}); - auto max = std::make_shared(reduce_sum, eps_const); - auto sqrt = std::make_shared(max); - auto divide = std::make_shared(input, sqrt); - - function = std::make_shared(ngraph::NodeVector{divide}, ngraph::ParameterVector{input}); - - manager.register_pass(); + auto input = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); + auto pow = std::make_shared(input, exp); + auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); + auto reduce_sum = std::make_shared(pow, axes_const); + auto eps_const = opset4::Constant::create(element::f32, Shape{}, {eps_value}); + auto max = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(max); + auto divide = std::make_shared(input, sqrt); + + function = std::make_shared(NodeVector{divide}, ParameterVector{input}); + + manager.register_pass(); } { - auto input = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(3)); - auto exp = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{}, {2.f}); - auto pow = std::make_shared(input, exp); - auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {0, 1}); - auto reduce_sum = std::make_shared(pow, axes_const); - auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{}, {normalized_fp16_min}); - auto max = std::make_shared(reduce_sum, eps_const); - auto sqrt = std::make_shared(max); - auto divide = std::make_shared(input, sqrt); - - function_ref = std::make_shared(ngraph::NodeVector{divide}, ngraph::ParameterVector{input}); + auto input = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); + auto pow = std::make_shared(input, exp); + auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); + auto reduce_sum = std::make_shared(pow, axes_const); + auto eps_const = opset4::Constant::create(element::f32, Shape{}, {normalized_fp16_min}); + auto max = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(max); + auto divide = std::make_shared(input, sqrt); + + function_ref = std::make_shared(NodeVector{divide}, ParameterVector{input}); } } @@ -56,34 +112,34 @@ TEST_F(TransformationTestsF, DivisionToZeroWithMax) { TEST_F(TransformationTestsF, DivisionToZeroWithAdd) { const float eps_value = 0.000099f; { - auto input = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(3)); - auto exp = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{}, {2.f}); - auto pow = std::make_shared(input, exp); - auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {0, 1}); - auto reduce_sum = std::make_shared(pow, axes_const); - auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {eps_value}); - auto add = std::make_shared(reduce_sum, eps_const); - auto sqrt = std::make_shared(add); - auto divide = std::make_shared(input, sqrt); - - function = std::make_shared(ngraph::NodeVector{divide}, ngraph::ParameterVector{input}); - - manager.register_pass(); + auto input = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); + auto pow = std::make_shared(input, exp); + auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); + auto reduce_sum = std::make_shared(pow, axes_const); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(add); + auto divide = std::make_shared(input, sqrt); + + function = std::make_shared(NodeVector{divide}, ParameterVector{input}); + + manager.register_pass(); } { - auto input = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(3)); - auto exp = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{}, {2.f}); - auto pow = std::make_shared(input, exp); - auto axes_const = ngraph::opset4::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {0, 1}); - auto reduce_sum = std::make_shared(pow, axes_const); - auto eps_const = ngraph::opset4::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {normalized_fp16_min}); - auto add = std::make_shared(reduce_sum, eps_const); - auto sqrt = std::make_shared(add); - auto divide = std::make_shared(input, sqrt); - - function_ref = std::make_shared(ngraph::NodeVector{divide}, ngraph::ParameterVector{input}); - - manager.register_pass(); + auto input = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); + auto pow = std::make_shared(input, exp); + auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); + auto reduce_sum = std::make_shared(pow, axes_const); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min}); + auto add = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(add); + auto divide = std::make_shared(input, sqrt); + + function_ref = std::make_shared(NodeVector{divide}, ParameterVector{input}); + + manager.register_pass(); } }