Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IE][nGraph] resolve division by zero for FP16 #8676

Merged
merged 10 commits into from
Dec 7, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <pot_transformations.hpp>
#include <pruning.hpp>
#include <transformations/common_optimizations/compress_float_constants.hpp>
#include <transformations/common_optimizations/division_by_zero_fp16_resolver.hpp>
#include <transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp>
#include <transformations/common_optimizations/moc_transformations.hpp>
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
Expand Down Expand Up @@ -60,6 +61,7 @@ void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork

void InferenceEnginePython::CompressModelTransformation(InferenceEnginePython::IENetwork network) {
ngraph::pass::Manager manager;
manager.register_pass<ov::pass::DivisionByZeroFP16Resolver>();
manager.register_pass<ov::pass::MarkPrecisionSensitiveSubgraphs>();
manager.register_pass<ov::pass::CompressFloatConstants>();
manager.run_passes(network.actual->getFunction());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <gtest/gtest.h>

#include <string>
#include <memory>

#include <ngraph/function.hpp>
#include <openvino/opsets/opset4.hpp>
#include <openvino/pass/manager.hpp>
#include <transformations/common_optimizations/division_by_zero_fp16_resolver.hpp>
#include <transformations/init_node_info.hpp>

#include "common_test_utils/ngraph_test_utils.hpp"

using namespace testing;
using namespace ov;
constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value


TEST_F(TransformationTestsF, DivisionByZeroMinimalPattern) {
const float eps_value = 1.e-12;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);

function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);

function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}

TEST_F(TransformationTestsF, PowWithNegativeExponent) {
const float eps_value = 1.e-12;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);

function = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);

function_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}

TEST_F(TransformationTestsF, PowWithPositiveExponent) {
// graph should be left unchanged
const float eps_value = 1.e-12;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);

function = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);

function_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}

TEST_F(TransformationTestsF, DivisionByZeroMinimalPatternUnchanged) {
// if eps_value is greater than normalized_fp16_min then leave graph unchanged
const float eps_value = 0.0001f;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);

function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);

function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}

TEST_F(TransformationTestsF, DivisionByZeroInL2NormWithSqrtAndWithMax) {
const float eps_value = 1.e-12;
{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{}, {eps_value});
auto max = std::make_shared<opset4::Maximum>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(max);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);

function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{}, {normalized_fp16_min});
auto max = std::make_shared<opset4::Maximum>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(max);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);

function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}


TEST_F(TransformationTestsF, DivisionByZeroInL2NormWithSqrtAndWithAdd) {
const float eps_value = 1.e-12;
{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(add);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);

function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
auto add = std::make_shared<opset4::Add>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(add);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);

function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <utility>
#include <memory>

#include <transformations_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include "ngraph/pattern/matcher.hpp"

namespace ov {
namespace pass {

class TRANSFORMATIONS_API DivisionByZeroFP16Resolver;

} // namespace pass
} // namespace ov

/**
* @ingroup ie_transformation_common_api
* @brief: clamps eps into fp16 minimal normalized value in input_1/Maximum(input_2, eps); input_1/Add(input_2, eps);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: looks like this is not al patterns that are supported. Better to keep patterns description in cpp.

* and input_1*Pow(Maximum[Add](input_2, eps), -z) patterns to prevent division by zero.
*
* eps must be always nonzero to prevent from NaNs in such expressions if input_1 and input_2 simultaneously happened to be zero.
* We should keep in such patterns eps >= fp16 minimal normalized value so that
* CompressFloatConstants should not cast them into zero during compression into f16.
*/
class ov::pass::DivisionByZeroFP16Resolver: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
DivisionByZeroFP16Resolver();
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "itt.hpp"
#include "transformations/common_optimizations/division_by_zero_fp16_resolver.hpp"
#include "transformations/utils/utils.hpp"

#include <memory>
#include <vector>

#include <openvino/opsets/opset8.hpp>
#include "ngraph/rt_info.hpp"
#include <openvino/pass/pattern/op/wrap_type.hpp>
#include <openvino/pass/pattern/op/or.hpp>

NGRAPH_RTTI_DEFINITION(ov::pass::DivisionByZeroFP16Resolver, "DivisionByZeroFP16Resolver", 0);

constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value

using namespace ov;

ov::pass::DivisionByZeroFP16Resolver::DivisionByZeroFP16Resolver() {
MATCHER_SCOPE(DivisionByZeroFP16Resolver);

// to detect the following patterns where eps is used to prevent division by zero:
// input_1/Maximum(input_2, eps)
// input_1/Add(input_2, eps)
// input_1/Sqrt(Maximum(input_2, eps))
// input_1/Sqrt(Add(input_2, eps))
// input_1*Pow(Maximum(input_2, eps), -z)
// input_1*Pow(Add(input_2, eps), -z)
auto input_1 = pattern::any_input();
auto input_2 = pattern::any_input();

auto eps_const_pattern = pattern::wrap_type<opset8::Constant>();
auto max = std::make_shared<opset8::Maximum>(input_2, eps_const_pattern);
auto add = std::make_shared<opset8::Add>(input_2, eps_const_pattern);
auto max_or_add = std::make_shared<pattern::op::Or>(OutputVector{max, add});

auto sqrt = std::make_shared<opset8::Sqrt>(max_or_add);
auto sqrt_or_max_add = std::make_shared<pattern::op::Or>(OutputVector{max_or_add, sqrt});
// whether is divided directly or after sqrt (e.g. in L2Norm after sqrt, in MVN is divided directly)
auto divide = std::make_shared<opset8::Divide>(input_1, sqrt_or_max_add);

auto pow_exp = pattern::wrap_type<opset8::Constant>();
auto pow_pattern = std::make_shared<opset8::Power>(max_or_add, pow_exp);
auto mul_pattern = std::make_shared<opset8::Multiply>(input_1, pow_pattern);
auto div_or_mul_to_negative_pow = std::make_shared<pattern::op::Or>(OutputVector{divide, mul_pattern});

matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& pattern_to_output = m.get_pattern_map();

const auto mul = std::dynamic_pointer_cast<opset8::Multiply>(m.get_match_root());
if (mul) {
// pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched
const auto pow_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_to_output.at(pow_exp));
for (float val : pow_const->get_vector<float>())
if (val >= 0) // continue only if exponent is negative (z < 0)
return false;
}

const auto eps_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_to_output.at(eps_const_pattern));
if (!eps_const || eps_const->get_element_type() != ov::element::f32)
return false;

for (float val : eps_const->get_vector<float>())
if (val >= normalized_fp16_min)
return false;

auto new_constant = std::make_shared<opset8::Constant>(eps_const->get_element_type(),
eps_const->get_shape(),
normalized_fp16_min);
copy_runtime_info(eps_const, new_constant);
replace_node(eps_const, new_constant);
return true;
};

auto m = std::make_shared<pattern::Matcher>(div_or_mul_to_negative_pow, matcher_name);
register_matcher(m, callback);
}