openvinotoolkit · pavel-esir · Oct 19, 2021 · Oct 22, 2021 · Nov 11, 2021 · Nov 10, 2021
diff --git a/model-optimizer/automation/package_BOM.txt b/model-optimizer/automation/package_BOM.txt
@@ -578,6 +578,7 @@ extensions/middle/DeleteControlFlowEdges.py
 extensions/middle/DeleteNotExecutable.py
 extensions/middle/dequantize_linear_resolver.py
 extensions/middle/DilatedConvolution.py
+extensions/middle/DivisionToZeroFP16Resolver.py
 extensions/middle/EltwiseChecker.py
 extensions/middle/EltwiseInputReshape.py
 extensions/middle/FakeSplitOutputs.py

@@ -8,12 +8,12 @@
 from mo.back.replacement import BackReplacementPattern
 from mo.graph.graph import Graph
 from mo.graph.graph import Node
-from mo.middle.passes.convert_data_type import data_type_str_to_np
+from mo.middle.passes.convert_data_type import data_type_str_to_np, convert_blob
 from mo.utils.error import Error
 
 operations_with_data_type_attributes = {
-    'Cast': {'attr_name': 'dst_type', 'in_ports_to_check': (0,)},
-    'Range': {'attr_name': 'output_type', 'in_ports_to_check': (0, 1, 2)},
+    'Cast': {'attr_name': 'dst_type', 'in_ports_to_check': (0,), 'check_out_shape': False},
+    'Range': {'attr_name': 'output_type', 'in_ports_to_check': (0, 1, 2), 'check_out_shape': True},
 }
 
 
@@ -27,7 +27,6 @@ class ChangeOutputTypeAttributes(BackReplacementPattern):
     avoid floating point overflow.
     """
     enabled = True
-    force_shape_inference = True
 
     def run_after(self):
         from extensions.back.MarkNodesWithShapeValues import MarkNodesWithShapeValues
@@ -73,17 +72,31 @@ def assert_that_is_castable_to_fp16(node: Node):
         val = node.in_port(i).data.get_value()
         if val is None:
             return
-
-        if np.any(val > np.finfo(np.float16).max) or np.any(val < np.finfo(np.float16).min):
-            raise Error("Try to convert with --data_type=FP32 argument. "
-                        "This model can not be converted to FP16 precision, since "
-                        "'{}' node value {} exceeds FP16 allowed limits: [{}, {}]"
-                        .format(node_name, val, np.finfo(np.float16).min, np.finfo(np.float16).max))
-        # further this input values will be rewritten since force_shape_inference=True
+        # is needed for Range
         node.in_port(i).data.set_value(val.astype(np.float16))
+        node.in_node(i)['correct_data_type'] = True
 
     original_output = node.out_port(0).data.get_value()
-    node.infer(node)
+    converted_blob, infinite_match_count, zero_match_count = convert_blob(original_output, np.float16)
+
+    if infinite_match_count:
+        # some models have -Inf values but nevertheless are correctly inferred in FP16
+        # we should not raise an Error here but instead show a warning
+        log.error("{} of {} elements of '{}' were clipped to infinity while converting into FP16. "
+                  "This may lead to incorrect results during inference or may not be a problem, "
+                  "depending on the model.".format(infinite_match_count, original_output.size, node_name,
+                                                   extra={'is_warning': True}))
+    if zero_match_count:
+        # some values are clipped into zero but nevertheless are correctly inferred
+        log.error("{} of {} elements of '{}' were clipped to zero while converting into FP16. "
+                  "This may lead to incorrect results during inference or may not be a problem, "
+                  "depending on the model.".format(zero_match_count, original_output.size, node_name,
+                                                   extra={'is_warning': True}))
+
+    if not operations_with_data_type_attributes[op_name]['check_out_shape']:
+        return
+
+    node.infer(node)  # is needed for Range
     casted_output = node.out_port(0).data.get_value()
     original_output_len = len(original_output) if hasattr(original_output, '__len__') else None
     casted_output_len = len(casted_output) if hasattr(casted_output, '__len__') else None

diff --git a/model-optimizer/extensions/middle/DivisionToZeroFP16Resolver.py b/model-optimizer/extensions/middle/DivisionToZeroFP16Resolver.py
@@ -0,0 +1,72 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import logging as log
+
+import numpy as np
+
+from mo.graph.graph import Graph, Node
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class DivisionToZeroFP16Resolver(MiddleReplacementPattern):
+    """
+    Patterns input_1/Maximum(input_2, eps) and input_1/Add(input_2, eps) are used
+    to prevent division to zero. But usually in FP32 networks eps is such
+    small (e.g. 1e-9, 1e-12, ...) that after casting to FP16 it's collapsed to zero.
+    This can lead to division to zero if input_2 is also zero.
+    To prevent that we change eps to FP16 smallest normal value in such patterns.
+    """
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['cmd_params'].compress_fp16]
+
+    def run_after(self):
+        from extensions.middle.fusings import Fusing
+        return [Fusing]
+
+    def run_before(self):
+        from extensions.middle.L2NormFusing import L2NormToNorm
+        return [L2NormToNorm]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('input', dict(kind='data')),
+                ('eps_or_input_data_1', dict(kind='data')),  # one of these inputs is eps
+                ('eps_or_input_data_2', dict(kind='data')),
+                ('max_or_add', dict(kind='op', op=lambda x: x in ['Maximum', 'Add'])),
+                ('max_or_add_data', dict(kind='data')),
+                ('pow_exp', dict(kind='data', value=lambda x: np.all(x < 0) if x is not None else False)),
+                ('pow', dict(kind='op', op='Pow')),
+                ('pow_data', dict(kind='data')),
+                ('multiplicative_inverse', dict(kind='op', op='Mul')),
+            ],
+            edges=[
+                ('eps_or_input_data_1', 'max_or_add'),
+                ('eps_or_input_data_2', 'max_or_add'),
+                ('max_or_add', 'max_or_add_data'),
+                ('max_or_add_data', 'pow', {'in': 0}),
+                ('pow_exp', 'pow', {'in': 1}),
+                ('pow', 'pow_data'),
+                ('pow_data', 'multiplicative_inverse'),
+                ('input', 'multiplicative_inverse'),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        is_port_1_const = match['max_or_add'].in_port(1).get_source().node.soft_get('op') == 'Const'
+        port = 1 if is_port_1_const else 0
+
+        const_node = match['max_or_add'].in_port(port).get_source().node
+        value = const_node.value
+        # we use FP16 smallest normal value, because arithmetic of subnormal values is slower
+        fp16_smallest_positive = np.finfo(np.float16).tiny
+
+        if value is not None and np.all(value < fp16_smallest_positive):
+            new_eps = np.full_like(value, fp16_smallest_positive)
+            const_node.out_port(0).data.set_value(new_eps)
+
+            const_name = const_node.soft_get('name', const_node.id)
+            log.error("Changing value of constant '{}' from {} -> {} to "
+                      "prevent division to zero when casted to FP16".format(const_name, value, new_eps),
+                      extra={'is_warning': True})
diff --git a/model-optimizer/extensions/middle/L2NormFusing.py b/model-optimizer/extensions/middle/L2NormFusing.py
@@ -22,13 +22,14 @@ class L2NormToNorm(MiddleReplacementPattern):
     enabled = True
     force_clean_up = True
 
-    def run_after(self):
-        from extensions.middle.pass_separator import PreMiddleStart
-        return [PreMiddleStart]
-
     def run_before(self):
-        from extensions.middle.pass_separator import MiddleStart
-        return [MiddleStart]
+        from extensions.middle.pass_separator import PostMiddleStart
+        return [PostMiddleStart]
+
+    def run_after(self):
+        from extensions.middle.DivisionToZeroFP16Resolver import DivisionToZeroFP16Resolver
+        # because DivisionToZeroFP16Resolver should match to Pow(x, -1)/Div part of L2Norm
+        return [DivisionToZeroFP16Resolver]
 
     def pattern(self):
         return dict(

@@ -70,6 +70,9 @@ def infer(node: Node):
         start = node.in_port(0).data.get_value()
         limit = node.in_port(1).data.get_value()
         delta = node.in_port(2).data.get_value()
+        if any(np.isinf((start, limit, delta))):
+            raise Error("Range node's '{}' input values must be finite, but instead "
+                        "it contain infinities: start={}, stop={}, step={}".format(name, start, limit, delta))
 
         for input in (start, limit, delta):
             if input is not None and not node.has_valid('output_type'):

diff --git a/model-optimizer/extensions/ops/select.py b/model-optimizer/extensions/ops/select.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from mo.front.common.partial_infer.utils import compatible_shapes, dynamic_dimension, shape_array, is_fully_defined
+from mo.front.common.partial_infer.utils import compatible_shapes, dynamic_dimension, shape_array, is_fully_defined, compatible_dims
 from mo.graph.graph import Node, Graph, Error
 from mo.ops.op import Op
 from mo.utils.broadcasting import bi_directional_shape_broadcasting, bi_directional_broadcasting
@@ -55,12 +55,12 @@ def infer(node: Node):
             # but by adding ones to the end we can achieve numpy compatibility, as in transformation SelectBroadcast.py
             if node.has_valid('format') and node['format'] == 'tf' and len(condition_shape) == 1:
                 # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/array_ops.py#L4596-L4598
-                msg_tf = "In Select node '{}' if 'condition' is a 1D tensor then it's size " \
-                         "must be matching with the first dimension of then/else branches. " \
+                msg_tf = "In Select node '{}' if 'condition' is a 1D tensor then sizes " \
+                         "must be compatible with the first dimension of then/else branches. " \
                          "But instead got: cond_shape={}, then_shape={}, else_shape={}".format(
                             node_name, condition_shape, a_shape, b_shape)
 
-                assert condition_shape[0] == output_shape[0], msg_tf
+                assert compatible_dims(condition_shape[0], output_shape[0]), msg_tf
                 condition_shape = np.concatenate((condition_shape, np.ones(len(output_shape) - 1)))
 
             output_shape = bi_directional_shape_broadcasting(output_shape, condition_shape)

@@ -126,11 +126,13 @@ def convert_node_blobs(graph: Graph, node: Node, data_type: type):
             if finite_match_count:
                 log.error(
                     ("{} elements of {} were clipped to infinity while converting a blob for node [{}] to {}. " +
-                     refer_to_faq_msg(76)).format(finite_match_count, blob.size, consumers, data_type))
+                     refer_to_faq_msg(76)).format(finite_match_count, blob.size, consumers, data_type),
+                                                   extra={'is_warning': True})
             if zero_match_count:
-                log.warning(
+                log.error(
                     ("{} elements of {} were clipped to zero while converting a blob for node [{}] to {}. " +
-                     refer_to_faq_msg(77)).format(zero_match_count, blob.size, consumers, data_type))
+                     refer_to_faq_msg(77)).format(zero_match_count, blob.size, consumers, data_type),
+                                                   extra={'is_warning': True})
 
             node.value = new_blob
             # for the constant node need to propagate the converted value to the node output because there is a fake

@@ -57,12 +57,16 @@ def test_cast_correct_case(self):
     def test_cast_out_of_fp16_max(self):
         input_data = np.array([0, 100000, 4, 9, 0])
         graph, graph_ref = build_cast_test_graphs(input_data, dst_type_str='FP16')
-        self.assertRaises(Error, ChangeOutputTypeAttributes().find_and_replace_pattern, graph)
+        with self.assertLogs() as captured:
+            ChangeOutputTypeAttributes().find_and_replace_pattern(graph)
+            self.assertRegex(str(captured[1]), 'were clipped to infinity')
 
     def test_cast_out_of_fp16_min(self):
         input_data = np.array([0, -100000, 4, 9, 0])
         graph, graph_ref = build_cast_test_graphs(input_data, dst_type_str='FP16')
-        self.assertRaises(Error, ChangeOutputTypeAttributes().find_and_replace_pattern, graph)
+        with self.assertLogs() as captured:
+            ChangeOutputTypeAttributes().find_and_replace_pattern(graph)
+            self.assertRegex(str(captured[1]), 'were clipped to infinity')
 
 
 def build_range_test_graphs(start=0, limit=10, delta=1, dst_type_str='FP16',

diff --git a/model-optimizer/unit_tests/extensions/middle/DivisionToZeroFP16Resolver_test.py b/model-optimizer/unit_tests/extensions/middle/DivisionToZeroFP16Resolver_test.py
@@ -0,0 +1,115 @@
+# Copyright (C) 2018-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+
+import numpy as np
+
+from extensions.middle.DivisionToZeroFP16Resolver import DivisionToZeroFP16Resolver
+from mo.front.common.partial_infer.utils import shape_array
+from mo.graph.graph import Node
+from unit_tests.utils.graph import build_graph, result, regular_op_with_empty_data, connect, shaped_parameter, \
+    valued_const_with_data
+
+
+class ChangeOutputTypeAttributesTests(unittest.TestCase):
+
+    def test_division_maximum(self):
+        self.build_and_test_division_graph(eps=np.array(1e-12), pow_value=np.array(-1), preventing_type='Maximum')
+
+    def test_division_add(self):
+        self.build_and_test_division_graph(eps=np.array(1e-12), pow_value=np.array(-1), preventing_type='Add')
+
+    def test_division_arbitrary_negative_pow_1(self):
+        self.build_and_test_division_graph(eps=np.array(1e-12), pow_value=np.array(-1/2), preventing_type='Add')
+
+    def test_division_arbitrary_negative_pow_2(self):
+        self.build_and_test_division_graph(eps=np.array(1e-12), pow_value=np.array(-0.2), preventing_type='Add')
+
+    def test_division_eps_as_array_1(self):
+        self.build_and_test_division_graph(eps=np.array([1e-12, 1e-12]), pow_value=np.array(-1), preventing_type='Add')
+
+    def test_division_eps_as_array_2(self):
+        self.build_and_test_division_graph(eps=np.array([1e-12]), pow_value=np.array(-1), preventing_type='Add')
+
+    # in this case graph should not be changed so eps will be left unchanged 1e-2
+    # in that case build_and_test_division_graph will raise AssertionError and it's expected
+    def test_division_graph_not_changed_1(self):
+        try:
+            self.build_and_test_division_graph(eps=np.array(1e-2), pow_value=np.array(-1), preventing_type='Maximum')
+            raise Exception
+        except AssertionError:
+            pass
+
+    # if at least one value is greater than FP16 smallest normal value
+    # graph should not be changed so eps will be left unchanged ([1e-2, 1e-12])
+    # in that case build_and_test_division_graph will raise AssertionError and it's expected
+    def test_division_graph_not_changed_2(self):
+        try:
+            self.build_and_test_division_graph(eps=np.array([1e-2, 1e-12]), pow_value=np.array(-1), preventing_type='Maximum')
+            raise Exception
+        except AssertionError:
+            pass
+
+    def build_and_test_division_graph(self, eps, pow_value, preventing_type):
+        nodes = {
+            **shaped_parameter('input_1', shape_array((1, 3, 10, 10))),
+            **shaped_parameter('input_2', shape_array((1, 3, 10, 10))),
+            **regular_op_with_empty_data(preventing_type, {'type': preventing_type, 'op': preventing_type}),
+            **regular_op_with_empty_data('negative_pow', {'type': 'Pow', 'op': 'Pow'}),
+            **regular_op_with_empty_data('mul', {'type': 'Mul', 'op': 'Mul'}),
+
+            **valued_const_with_data('negative_pow_const', pow_value),
+            **valued_const_with_data('eps', eps),
+            **result('res'),
+        }
+
+        edges = [
+            *connect('input_2', '0:' + preventing_type),
+            *connect('eps', '1:' + preventing_type),
+            *connect(preventing_type, '0:negative_pow'),
+            *connect('negative_pow_const', '1:negative_pow'),
+            *connect('negative_pow', '1:mul'),
+            *connect('input_1', '0:mul'),
+            *connect('mul', 'res'),
+        ]
+        graph = build_graph(nodes, edges)
+        graph.graph['cmd_params'].compress_fp16 = True
+
+        DivisionToZeroFP16Resolver().find_and_replace_pattern(graph)
+
+        self.assertTrue(np.all(Node(graph, 'eps').value == np.finfo(np.float16).tiny))
+
+    def test_l2_norm(self):
+        nodes = {
+            **shaped_parameter('input', shape_array((1, 3, 10, 10))),
+            **regular_op_with_empty_data('square', {'type': 'Pow', 'op': 'Pow'}),
+            **regular_op_with_empty_data('sum', {'type': 'ReduceSum', 'op': 'ReduceSum'}),
+            **regular_op_with_empty_data('max', {'type': 'Maximum', 'op': 'Maximum'}),
+            **regular_op_with_empty_data('rsqrt', {'type': 'Pow', 'op': 'Pow'}),
+            **regular_op_with_empty_data('l2norm', {'type': 'Mul', 'op': 'Mul'}),
+
+            **valued_const_with_data('rsqrt_pow_const', np.array(-1 / 2)),
+            **valued_const_with_data('square_pow', np.array(2)),
+            **valued_const_with_data('eps', np.array(1e-12)),
+            **result('res'),
+        }
+
+        edges = [
+            *connect('input:0', '0:square'),
+            *connect('square_pow', '1:square'),
+            *connect('square', 'sum'),
+            *connect('sum', '0:max'),
+            *connect('eps', '1:max'),
+            *connect('max', '0:rsqrt'),
+            *connect('rsqrt_pow_const', '1:rsqrt'),
+            *connect('rsqrt', '0:l2norm'),
+            *connect('input:0', '1:l2norm', skip_data=True),
+            *connect('l2norm', 'res'),
+        ]
+        graph = build_graph(nodes, edges)
+        graph.graph['cmd_params'].compress_fp16 = True
+
+        DivisionToZeroFP16Resolver().find_and_replace_pattern(graph)
+
+        self.assertTrue(np.all(Node(graph, 'eps').value == np.finfo(np.float16).tiny))
diff --git a/model-optimizer/unit_tests/extensions/ops/select_test.py b/model-optimizer/unit_tests/extensions/ops/select_test.py
@@ -4,7 +4,7 @@
 import unittest
 
 import numpy as np
-
+import re
 from extensions.ops.select import Select
 from mo.front.common.partial_infer.utils import dynamic_dimension, shape_array, dynamic_dimension_value
 from mo.front.common.partial_infer.utils import strict_compare_tensors, int64_array
@@ -273,12 +273,12 @@ def test_select_infer_tf_condition(self):
         self.assertTrue(flag, msg)
 
     def test_select_infer_tf_condition_assert_raises(self):
-        with self.assertRaisesRegex(AssertionError, "if 'condition' is a 1D tensor then it's size"):
+        with self.assertRaisesRegex(AssertionError, r"In Select node .*if 'condition' is a 1D tensor then"):
             self.build_select_graph_and_infer(condition_value=None, condition_shape=shape_array([42]),
-                                                      then_value=None, then_shape=shape_array([100, 20]),
-                                                      else_value=None, else_shape=shape_array([100, 20]),
-                                                      out_value=None, out_shape=shape_array([100, 20]),
-                                                      auto_broadcast='numpy', fw_format='tf')
+                                              then_value=None, then_shape=shape_array([100, 20]),
+                                              else_value=None, else_shape=shape_array([100, 20]),
+                                              out_value=None, out_shape=shape_array([100, 20]),
+                                              auto_broadcast='numpy', fw_format='tf')
 
     def test_select_infer_assert_pdpd(self):
         with self.assertRaisesRegex(Error, "PDPD broadcasting rule is not implemented yet"):