Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IE][nGraph] resolve division by zero for FP16 #8676

Merged
merged 10 commits into from
Dec 7, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <pot_transformations.hpp>
#include <pruning.hpp>
#include <transformations/common_optimizations/compress_float_constants.hpp>
#include <transformations/common_optimizations/division_by_zero_fp16_resolver.hpp>
#include <transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp>
#include <transformations/common_optimizations/moc_transformations.hpp>
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
Expand Down Expand Up @@ -60,6 +61,7 @@ void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork

void InferenceEnginePython::CompressModelTransformation(InferenceEnginePython::IENetwork network) {
ngraph::pass::Manager manager;
manager.register_pass<ov::pass::DivisionByZeroFP16Resolver>();
manager.register_pass<ov::pass::MarkPrecisionSensitiveSubgraphs>();
manager.register_pass<ov::pass::CompressFloatConstants>();
manager.run_passes(network.actual->getFunction());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <gtest/gtest.h>

#include <string>
#include <memory>

#include <ngraph/function.hpp>
#include <openvino/opsets/opset4.hpp>
#include <openvino/pass/manager.hpp>
#include <transformations/common_optimizations/division_by_zero_fp16_resolver.hpp>
#include <transformations/init_node_info.hpp>

#include "common_test_utils/ngraph_test_utils.hpp"

using namespace testing;
using namespace ov;
constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value


TEST_F(TransformationTestsF, DivisionByZeroMinimalPattern) {
const float eps_value = 1.e-12;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);

function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);

function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}

TEST_F(TransformationTestsF, PowWithNegativeExponent) {
const float eps_value = 1.e-12;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);

function = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);

function_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}

TEST_F(TransformationTestsF, PowWithPositiveExponent) {
// graph should be left unchanged
const float eps_value = 1.e-12;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);

function = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);

function_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}

TEST_F(TransformationTestsF, DivisionByZeroMinimalPatternUnchanged) {
// if eps_value is greater than normalized_fp16_min then leave graph unchanged
const float eps_value = 0.0001f;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);

function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);

function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}

TEST_F(TransformationTestsF, DivisionByZeroWithMax) {
const float eps_value = 1.e-12;
{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{}, {eps_value});
auto max = std::make_shared<opset4::Maximum>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(max);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);

function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{}, {normalized_fp16_min});
auto max = std::make_shared<opset4::Maximum>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(max);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);

function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}


TEST_F(TransformationTestsF, DivisionByZeroWithAdd) {
const float eps_value = 1.e-12;
{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(add);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);

function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});

manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}

{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
auto add = std::make_shared<opset4::Add>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(add);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);

function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <utility>
#include <memory>

#include <transformations_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include "ngraph/pattern/matcher.hpp"

namespace ov {
namespace pass {

class TRANSFORMATIONS_API DivisionByZeroFP16Resolver;

} // namespace pass
} // namespace ov

/**
* @ingroup ie_transformation_common_api
* @brief: clamps eps into fp16 minimal normalized value in input_1/Maximum(input_2, eps) and input_1/Add(input_2, eps)
* patterns to prevent division by zero.
*
* eps must be always nonzero to prevent from NaNs in such expressions if input_1 and input_2 simultaneously happened to be zero.
* We should keep in such patterns eps >= fp16 minimal normalized value so that
* CompressFloatConstants should not cast them into zero during compression into f16.
*/
class ov::pass::DivisionByZeroFP16Resolver: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
DivisionByZeroFP16Resolver();
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "itt.hpp"
#include "transformations/common_optimizations/division_by_zero_fp16_resolver.hpp"
#include "transformations/utils/utils.hpp"

#include <memory>
#include <vector>

#include <openvino/opsets/opset8.hpp>
#include "ngraph/rt_info.hpp"
#include <openvino/pass/pattern/op/wrap_type.hpp>
#include <openvino/pass/pattern/op/or.hpp>

NGRAPH_RTTI_DEFINITION(ov::pass::DivisionByZeroFP16Resolver, "DivisionByZeroFP16Resolver", 0);

constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value

using namespace ov;

ov::pass::DivisionByZeroFP16Resolver::DivisionByZeroFP16Resolver() {
MATCHER_SCOPE(DivisionByZeroFP16Resolver);
auto input_1 = pattern::any_input();
auto input_2 = pattern::any_input();

auto eps_const_pattern = pattern::wrap_type<opset8::Constant>();
auto max = std::make_shared<opset8::Maximum>(input_2, eps_const_pattern);
auto add = std::make_shared<opset8::Add>(input_2, eps_const_pattern);
auto max_or_add = std::make_shared<pattern::op::Or>(OutputVector{max, add});
auto divide = std::make_shared<opset8::Divide>(input_1, max_or_add);
auto pow_exp = pattern::wrap_type<opset8::Constant>();
auto pow_pattern = std::make_shared<opset8::Power>(max_or_add, pow_exp);
auto div_or_pow = std::make_shared<pattern::op::Or>(OutputVector{divide, pow_pattern});
pavel-esir marked this conversation as resolved.
Show resolved Hide resolved

matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& pattern_to_output = m.get_pattern_value_map();

const auto pow = std::dynamic_pointer_cast<opset8::Power>(m.get_match_root());
if (pow) {
const auto pow_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_to_output.at(pow_exp).get_node_shared_ptr());
pavel-esir marked this conversation as resolved.
Show resolved Hide resolved
for (float val : pow_const->get_vector<float>())
if (val >= 0) // only for negative exponents
return false;
}

const auto eps_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_to_output.at(eps_const_pattern).get_node_shared_ptr());
if (!eps_const || eps_const->get_element_type() != ov::element::f32)
return false;

for (float val : eps_const->get_vector<float>())
if (val >= normalized_fp16_min)
return false;

auto new_constant = std::make_shared<opset8::Constant>(eps_const->get_element_type(),
eps_const->get_shape(),
normalized_fp16_min);
copy_runtime_info(eps_const, new_constant);
replace_node(eps_const, new_constant);
return true;
};

auto m = std::make_shared<pattern::Matcher>(div_or_pow, matcher_name);
register_matcher(m, callback);
}