-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[IE][nGraph] resolve division by zero for FP16 #8676
Merged
rkazants
merged 10 commits into
openvinotoolkit:master
from
pavel-esir:resolve_division_to_zero
Dec 7, 2021
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
c6146c6
initial solution
pavel-esir c4aa420
changed namespaces from ngraph -> ov; completed unit-tests
pavel-esir 5ce9610
added Power with negative exponent into the pattern
pavel-esir df3740e
Merge remote-tracking branch 'upstream/master' into resolve_division_…
pavel-esir 75aac2a
division to -> division by; aligned with OV new folder structure; som…
pavel-esir 784c440
Merge remote-tracking branch 'upstream/master' into resolve_division_…
pavel-esir 3b4e24d
Merge remote-tracking branch 'upstream/master' into resolve_division_…
pavel-esir d6cb449
corrected pattern
pavel-esir d445228
Merge remote-tracking branch 'upstream/master' into resolve_division_…
pavel-esir 577e51b
changed get_pattern_value_map -> get_pattern_map
pavel-esir File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
210 changes: 210 additions & 0 deletions
210
...tests/functional/inference_engine/transformations/division_by_zero_fp16_resolver_test.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include <gtest/gtest.h> | ||
|
||
#include <string> | ||
#include <memory> | ||
|
||
#include <ngraph/function.hpp> | ||
#include <openvino/opsets/opset4.hpp> | ||
#include <openvino/pass/manager.hpp> | ||
#include <transformations/common_optimizations/division_by_zero_fp16_resolver.hpp> | ||
#include <transformations/init_node_info.hpp> | ||
|
||
#include "common_test_utils/ngraph_test_utils.hpp" | ||
|
||
using namespace testing; | ||
using namespace ov; | ||
constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value | ||
|
||
|
||
TEST_F(TransformationTestsF, DivisionByZeroMinimalPattern) { | ||
const float eps_value = 1.e-12; | ||
{ | ||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); | ||
auto add = std::make_shared<opset4::Add>(input_2, eps_const); | ||
auto divide = std::make_shared<opset4::Divide>(input_1, add); | ||
|
||
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2}); | ||
|
||
manager.register_pass<pass::DivisionByZeroFP16Resolver>(); | ||
} | ||
|
||
{ | ||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min}); | ||
auto add = std::make_shared<opset4::Add>(input_2, eps_const); | ||
auto divide = std::make_shared<opset4::Divide>(input_1, add); | ||
|
||
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2}); | ||
} | ||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); | ||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); | ||
} | ||
|
||
TEST_F(TransformationTestsF, PowWithNegativeExponent) { | ||
const float eps_value = 1.e-12; | ||
{ | ||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); | ||
auto add = std::make_shared<opset4::Add>(input_2, eps_const); | ||
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77}); | ||
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const); | ||
auto mul = std::make_shared<opset4::Multiply>(input_1, pow); | ||
|
||
function = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2}); | ||
|
||
manager.register_pass<pass::DivisionByZeroFP16Resolver>(); | ||
} | ||
|
||
{ | ||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min}); | ||
auto add = std::make_shared<opset4::Add>(input_2, eps_const); | ||
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77}); | ||
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const); | ||
auto mul = std::make_shared<opset4::Multiply>(input_1, pow); | ||
|
||
function_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2}); | ||
} | ||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); | ||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); | ||
} | ||
|
||
TEST_F(TransformationTestsF, PowWithPositiveExponent) { | ||
// graph should be left unchanged | ||
const float eps_value = 1.e-12; | ||
{ | ||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); | ||
auto add = std::make_shared<opset4::Add>(input_2, eps_const); | ||
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77}); | ||
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const); | ||
auto mul = std::make_shared<opset4::Multiply>(input_1, pow); | ||
|
||
function = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2}); | ||
|
||
manager.register_pass<pass::DivisionByZeroFP16Resolver>(); | ||
} | ||
|
||
{ | ||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); | ||
auto add = std::make_shared<opset4::Add>(input_2, eps_const); | ||
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77}); | ||
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const); | ||
auto mul = std::make_shared<opset4::Multiply>(input_1, pow); | ||
|
||
function_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2}); | ||
} | ||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); | ||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); | ||
} | ||
|
||
TEST_F(TransformationTestsF, DivisionByZeroMinimalPatternUnchanged) { | ||
// if eps_value is greater than normalized_fp16_min then leave graph unchanged | ||
const float eps_value = 0.0001f; | ||
{ | ||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); | ||
auto add = std::make_shared<opset4::Add>(input_2, eps_const); | ||
auto divide = std::make_shared<opset4::Divide>(input_1, add); | ||
|
||
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2}); | ||
|
||
manager.register_pass<pass::DivisionByZeroFP16Resolver>(); | ||
} | ||
|
||
{ | ||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); | ||
auto add = std::make_shared<opset4::Add>(input_2, eps_const); | ||
auto divide = std::make_shared<opset4::Divide>(input_1, add); | ||
|
||
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2}); | ||
} | ||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); | ||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); | ||
} | ||
|
||
TEST_F(TransformationTestsF, DivisionByZeroInL2NormWithSqrtAndWithMax) { | ||
const float eps_value = 1.e-12; | ||
{ | ||
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); | ||
auto pow = std::make_shared<opset4::Power>(input, exp); | ||
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); | ||
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{}, {eps_value}); | ||
auto max = std::make_shared<opset4::Maximum>(reduce_sum, eps_const); | ||
auto sqrt = std::make_shared<opset4::Sqrt>(max); | ||
auto divide = std::make_shared<opset4::Divide>(input, sqrt); | ||
|
||
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input}); | ||
|
||
manager.register_pass<pass::DivisionByZeroFP16Resolver>(); | ||
} | ||
|
||
{ | ||
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); | ||
auto pow = std::make_shared<opset4::Power>(input, exp); | ||
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); | ||
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{}, {normalized_fp16_min}); | ||
auto max = std::make_shared<opset4::Maximum>(reduce_sum, eps_const); | ||
auto sqrt = std::make_shared<opset4::Sqrt>(max); | ||
auto divide = std::make_shared<opset4::Divide>(input, sqrt); | ||
|
||
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input}); | ||
} | ||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); | ||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); | ||
} | ||
|
||
|
||
TEST_F(TransformationTestsF, DivisionByZeroInL2NormWithSqrtAndWithAdd) { | ||
const float eps_value = 1.e-12; | ||
{ | ||
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); | ||
auto pow = std::make_shared<opset4::Power>(input, exp); | ||
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); | ||
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); | ||
auto add = std::make_shared<opset4::Add>(reduce_sum, eps_const); | ||
auto sqrt = std::make_shared<opset4::Sqrt>(add); | ||
auto divide = std::make_shared<opset4::Divide>(input, sqrt); | ||
|
||
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input}); | ||
|
||
manager.register_pass<pass::DivisionByZeroFP16Resolver>(); | ||
} | ||
|
||
{ | ||
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3)); | ||
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); | ||
auto pow = std::make_shared<opset4::Power>(input, exp); | ||
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); | ||
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const); | ||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min}); | ||
auto add = std::make_shared<opset4::Add>(reduce_sum, eps_const); | ||
auto sqrt = std::make_shared<opset4::Sqrt>(add); | ||
auto divide = std::make_shared<opset4::Divide>(input, sqrt); | ||
|
||
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input}); | ||
} | ||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); | ||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); | ||
} |
35 changes: 35 additions & 0 deletions
35
...ormations/include/transformations/common_optimizations/division_by_zero_fp16_resolver.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <utility> | ||
#include <memory> | ||
|
||
#include <transformations_visibility.hpp> | ||
#include <ngraph/pass/graph_rewrite.hpp> | ||
#include "ngraph/pattern/matcher.hpp" | ||
|
||
namespace ov { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API DivisionByZeroFP16Resolver; | ||
|
||
} // namespace pass | ||
} // namespace ov | ||
|
||
/** | ||
* @ingroup ie_transformation_common_api | ||
* @brief: clamps eps into fp16 minimal normalized value in input_1/Maximum(input_2, eps); input_1/Add(input_2, eps); | ||
* and input_1*Pow(Maximum[Add](input_2, eps), -z) patterns to prevent division by zero. | ||
* | ||
* eps must be always nonzero to prevent from NaNs in such expressions if input_1 and input_2 simultaneously happened to be zero. | ||
* We should keep in such patterns eps >= fp16 minimal normalized value so that | ||
* CompressFloatConstants should not cast them into zero during compression into f16. | ||
*/ | ||
class ov::pass::DivisionByZeroFP16Resolver: public ngraph::pass::MatcherPass { | ||
public: | ||
NGRAPH_RTTI_DECLARATION; | ||
DivisionByZeroFP16Resolver(); | ||
}; |
81 changes: 81 additions & 0 deletions
81
...ansformations/src/transformations/common_optimizations/division_by_zero_fp16_resolver.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// Copyright (C) 2018-2021 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "itt.hpp" | ||
#include "transformations/common_optimizations/division_by_zero_fp16_resolver.hpp" | ||
#include "transformations/utils/utils.hpp" | ||
|
||
#include <memory> | ||
#include <vector> | ||
|
||
#include <openvino/opsets/opset8.hpp> | ||
#include "ngraph/rt_info.hpp" | ||
#include <openvino/pass/pattern/op/wrap_type.hpp> | ||
#include <openvino/pass/pattern/op/or.hpp> | ||
|
||
NGRAPH_RTTI_DEFINITION(ov::pass::DivisionByZeroFP16Resolver, "DivisionByZeroFP16Resolver", 0); | ||
|
||
constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value | ||
|
||
using namespace ov; | ||
|
||
ov::pass::DivisionByZeroFP16Resolver::DivisionByZeroFP16Resolver() { | ||
MATCHER_SCOPE(DivisionByZeroFP16Resolver); | ||
|
||
// to detect the following patterns where eps is used to prevent division by zero: | ||
// input_1/Maximum(input_2, eps) | ||
// input_1/Add(input_2, eps) | ||
// input_1/Sqrt(Maximum(input_2, eps)) | ||
// input_1/Sqrt(Add(input_2, eps)) | ||
// input_1*Pow(Maximum(input_2, eps), -z) | ||
// input_1*Pow(Add(input_2, eps), -z) | ||
auto input_1 = pattern::any_input(); | ||
auto input_2 = pattern::any_input(); | ||
|
||
auto eps_const_pattern = pattern::wrap_type<opset8::Constant>(); | ||
auto max = std::make_shared<opset8::Maximum>(input_2, eps_const_pattern); | ||
auto add = std::make_shared<opset8::Add>(input_2, eps_const_pattern); | ||
auto max_or_add = std::make_shared<pattern::op::Or>(OutputVector{max, add}); | ||
|
||
auto sqrt = std::make_shared<opset8::Sqrt>(max_or_add); | ||
auto sqrt_or_max_add = std::make_shared<pattern::op::Or>(OutputVector{max_or_add, sqrt}); | ||
// whether is divided directly or after sqrt (e.g. in L2Norm after sqrt, in MVN is divided directly) | ||
auto divide = std::make_shared<opset8::Divide>(input_1, sqrt_or_max_add); | ||
|
||
auto pow_exp = pattern::wrap_type<opset8::Constant>(); | ||
auto pow_pattern = std::make_shared<opset8::Power>(max_or_add, pow_exp); | ||
auto mul_pattern = std::make_shared<opset8::Multiply>(input_1, pow_pattern); | ||
auto div_or_mul_to_negative_pow = std::make_shared<pattern::op::Or>(OutputVector{divide, mul_pattern}); | ||
|
||
matcher_pass_callback callback = [=](pattern::Matcher& m) { | ||
const auto& pattern_to_output = m.get_pattern_map(); | ||
|
||
const auto mul = std::dynamic_pointer_cast<opset8::Multiply>(m.get_match_root()); | ||
if (mul) { | ||
// pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched | ||
const auto pow_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_to_output.at(pow_exp)); | ||
for (float val : pow_const->get_vector<float>()) | ||
if (val >= 0) // continue only if exponent is negative (z < 0) | ||
return false; | ||
} | ||
|
||
const auto eps_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_to_output.at(eps_const_pattern)); | ||
if (!eps_const || eps_const->get_element_type() != ov::element::f32) | ||
return false; | ||
|
||
for (float val : eps_const->get_vector<float>()) | ||
if (val >= normalized_fp16_min) | ||
return false; | ||
|
||
auto new_constant = std::make_shared<opset8::Constant>(eps_const->get_element_type(), | ||
eps_const->get_shape(), | ||
normalized_fp16_min); | ||
copy_runtime_info(eps_const, new_constant); | ||
replace_node(eps_const, new_constant); | ||
return true; | ||
}; | ||
|
||
auto m = std::make_shared<pattern::Matcher>(div_or_mul_to_negative_pow, matcher_name); | ||
register_matcher(m, callback); | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: looks like this is not al patterns that are supported. Better to keep patterns description in cpp.