diff --git a/.ci/azure/linux.yml b/.ci/azure/linux.yml index 7b1ee18d792d74..f61bb85dfb1850 100644 --- a/.ci/azure/linux.yml +++ b/.ci/azure/linux.yml @@ -88,6 +88,11 @@ jobs: python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/wheel/requirements-dev.txt # For running Python API tests python3 -m pip install -r $(REPO_DIR)/inference-engine/ie_bridges/python/src/requirements-dev.txt + # For running nGraph unit tests dependent on Python frameworks + python3 -m pip install -r $(REPO_DIR)/ngraph/test/requirements_test.txt + # For MO unit tests + python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements.txt + python3 -m pip install -r $(REPO_DIR)/model-optimizer/requirements_dev.txt # Speed up build wget https://github.com/ninja-build/ninja/releases/download/v1.10.0/ninja-linux.zip unzip ninja-linux.zip @@ -109,6 +114,7 @@ jobs: -DENABLE_WHEEL=ON -DENABLE_TESTS=ON -DNGRAPH_ONNX_IMPORT_ENABLE=ON + -DNGRAPH_ONNX_FRONTEND_ENABLE=ON -DENABLE_FASTER_BUILD=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules @@ -149,7 +155,15 @@ jobs: workingDirectory: $(BUILD_SAMPLES_DIR) displayName: 'Build c samples' - - script: . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml + - script: | + export MO_ROOT=$(INSTALL_DIR)/deployment_tools/model_optimizer + . $(SETUPVARS) -pyver 3.6 && python3 -m pytest -s $(INSTALL_DIR)/deployment_tools/model_optimizer/unit_tests --junitxml=TEST-ModelOptimizer.xml + displayName: 'Model Optimizer UT' + continueOnError: false + + - script: | + export FE_TEST_MODELS=$(INSTALL_DIR)/tests + . $(SETUPVARS) && $(INSTALL_TEST_DIR)/unit-test --gtest_print_time=1 --gtest_filter=-backend_api.config_unsupported:*IE_GPU* --gtest_output=xml:TEST-NGraphUT.xml displayName: 'nGraph UT' continueOnError: false diff --git a/.ci/azure/linux_onnxruntime.yml b/.ci/azure/linux_onnxruntime.yml index fce8fdddcc4f91..a2bfee8c70ac3e 100644 --- a/.ci/azure/linux_onnxruntime.yml +++ b/.ci/azure/linux_onnxruntime.yml @@ -95,6 +95,7 @@ jobs: -DENABLE_SAMPLES=OFF -DENABLE_SPEECH_DEMO=OFF -DNGRAPH_ONNX_IMPORT_ENABLE=ON + -DNGRAPH_ONNX_FRONTEND_ENABLE=ON -DNGRAPH_DEBUG_ENABLE=OFF $(REPO_DIR) workingDirectory: $(BUILD_DIR) diff --git a/.ci/azure/mac.yml b/.ci/azure/mac.yml index 04d4c16ea23344..90fc812bbaa36c 100644 --- a/.ci/azure/mac.yml +++ b/.ci/azure/mac.yml @@ -87,9 +87,6 @@ jobs: export PATH="/usr/local/opt/cython/bin:$PATH" export CC=gcc export CXX=g++ - # Disable errors with Ninja - export CXXFLAGS="-Wno-error=unused-command-line-argument" - export CFLAGS="-Wno-error=unused-command-line-argument" cmake -GNinja -DVERBOSE_BUILD=ON -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_STRICT_DEPENDENCIES=OFF -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules $(REPO_DIR) workingDirectory: $(BUILD_DIR) displayName: 'CMake' diff --git a/.ci/openvino-onnx/Dockerfile b/.ci/openvino-onnx/Dockerfile index 9b0f48cf66cc3e..315598225627e0 100644 --- a/.ci/openvino-onnx/Dockerfile +++ b/.ci/openvino-onnx/Dockerfile @@ -69,6 +69,7 @@ RUN cmake .. \ -DENABLE_PYTHON=ON \ -DPYTHON_EXECUTABLE=/usr/bin/python3 \ -DNGRAPH_ONNX_IMPORT_ENABLE=ON \ + -DNGRAPH_ONNX_FRONTEND_ENABLE=ON \ -DNGRAPH_DEBUG_ENABLE=OFF \ -DCMAKE_INSTALL_PREFIX=/openvino/dist \ -DNGRAPH_USE_PROTOBUF_LITE=${PROTOBUF_LITE} diff --git a/.github/workflows/code_style.yml b/.github/workflows/code_style.yml index 607fe2cb64ae1a..7969cf13aa15e9 100644 --- a/.github/workflows/code_style.yml +++ b/.github/workflows/code_style.yml @@ -25,7 +25,7 @@ jobs: run: | mkdir build cd build - cmake -DENABLE_PYTHON=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT .. + cmake -DENABLE_PYTHON=ON -DENABLE_TESTS=ON -DENABLE_PROFILING_ITT=ON -DSELECTIVE_BUILD=COLLECT .. - name: Check code style run: cmake --build build --target clang_format_check_all diff --git a/cmake/coverage.cmake b/cmake/coverage.cmake index 60c137337b3173..4d8976e0a80beb 100644 --- a/cmake/coverage.cmake +++ b/cmake/coverage.cmake @@ -92,9 +92,15 @@ ie_coverage_genhtml(INFO_FILE "ngraph" if(NGRAPH_ONNX_IMPORT_ENABLE) ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_importer" - PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_common*" - "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_editor*" - "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx_import*") + PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_common*" + "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/onnx_import*") ie_coverage_genhtml(INFO_FILE "onnx_importer" PREFIX "${OV_COVERAGE_BASE_DIRECTORY}") endif() + +if(NGRAPH_ONNX_FRONTEND_ENABLE) + ie_coverage_extract(INPUT "openvino" OUTPUT "onnx_ngraph_frontend" + PATTERNS "${OV_COVERAGE_BASE_DIRECTORY}/ngraph/frontend/onnx/frontend*") + ie_coverage_genhtml(INFO_FILE "onnx_ngraph_frontend" + PREFIX "${OV_COVERAGE_BASE_DIRECTORY}") +endif() diff --git a/cmake/developer_package/compile_flags/os_flags.cmake b/cmake/developer_package/compile_flags/os_flags.cmake index 7b5cc66d3e3434..0fdbf79e9ec7f2 100644 --- a/cmake/developer_package/compile_flags/os_flags.cmake +++ b/cmake/developer_package/compile_flags/os_flags.cmake @@ -68,13 +68,13 @@ function(ie_sse42_optimization_flags flags) if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # No such option for MSVC 2019 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - set(${flags} /arch:SSE4.2 /QxSSE4.2 PARENT_SCOPE) + set(${flags} /QxSSE4.2 PARENT_SCOPE) else() message(WARNING "Unsupported CXX compiler ${CMAKE_CXX_COMPILER_ID}") endif() else() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - set(${flags} -msse4.2 -xSSE4.2 PARENT_SCOPE) + set(${flags} -xSSE4.2 PARENT_SCOPE) else() set(${flags} -msse4.2 PARENT_SCOPE) endif() @@ -95,7 +95,7 @@ function(ie_avx2_optimization_flags flags) endif() else() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - set(${flags} -march=core-avx2 -xCORE-AVX2 -mtune=core-avx2 PARENT_SCOPE) + set(${flags} -xCORE-AVX2 PARENT_SCOPE) else() set(${flags} -mavx2 -mfma PARENT_SCOPE) endif() @@ -152,6 +152,24 @@ function(ie_arm_neon_optimization_flags flags) endif() endfunction() +# +# Disables all warnings for 3rd party targets +# +function(ov_disable_all_warnings) + foreach(target IN LISTS ARGN) + if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + target_compile_options(${target} PRIVATE /WX-) + elseif(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG) + target_compile_options(${target} PRIVATE -w) + elseif(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + # 193: zero used for undefined preprocessing identifier "XXX" + # 1011: missing return statement at end of non-void function "XXX" + # 2415: variable "xxx" of static storage duration was declared but never referenced + target_compile_options(${target} PRIVATE -diag-disable=warn,193,1011,2415) + endif() + endforeach() +endfunction() + # # Enables Link Time Optimization compilation # @@ -286,15 +304,12 @@ else() ie_add_compiler_flags(-Wreturn-type) ie_add_compiler_flags(-Wunused-variable) - # Disable noisy warnings - if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") ie_add_compiler_flags(-Wswitch) elseif(UNIX) ie_add_compiler_flags(-Wuninitialized -Winit-self) if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - ie_add_compiler_flags(-Wno-error=switch - -Winconsistent-missing-override) + ie_add_compiler_flags(-Winconsistent-missing-override) else() ie_add_compiler_flags(-Wmaybe-uninitialized) check_cxx_compiler_flag("-Wsuggest-override" SUGGEST_OVERRIDE_SUPPORTED) @@ -304,10 +319,11 @@ else() endif() endif() + # Disable noisy warnings + if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - ie_add_compiler_flags(-diag-disable=remark) - # noisy warnings from Intel Compiler 19.1.1.217 20200306 - ie_add_compiler_flags(-diag-disable=2196) + # 177: function "XXX" was declared but never referenced + ie_add_compiler_flags(-diag-disable=remark,177,2196) endif() # Linker flags @@ -319,7 +335,6 @@ else() elseif(LINUX) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections -Wl,--exclude-libs,ALL") endif() endif() diff --git a/cmake/developer_package/compile_flags/sanitizer.cmake b/cmake/developer_package/compile_flags/sanitizer.cmake index ef71780c0f169b..35343b129f3a34 100644 --- a/cmake/developer_package/compile_flags/sanitizer.cmake +++ b/cmake/developer_package/compile_flags/sanitizer.cmake @@ -34,13 +34,13 @@ endif() # common sanitizer options if (DEFINED SANITIZER_COMPILER_FLAGS) # ensure sumbols are present - set(SANITIZER_COMPILER_FLAGS "-g -fno-omit-frame-pointer") + set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -g -fno-omit-frame-pointer") # prevent unloading libraries at runtime, so sanitizer can resolve their symbols set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -Wl,-z,nodelete") if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=gold") - elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$" AND NOT WIN32) + elseif(OV_COMPILER_IS_CLANG AND NOT WIN32) if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.0) set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=lld") endif() diff --git a/cmake/developer_package/compile_flags/sdl.cmake b/cmake/developer_package/compile_flags/sdl.cmake index 10a1e86ad6d48f..7690a9031d864a 100644 --- a/cmake/developer_package/compile_flags/sdl.cmake +++ b/cmake/developer_package/compile_flags/sdl.cmake @@ -23,7 +23,7 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release") if (NOT ENABLE_SANITIZER) set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -s") endif() - elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$") + elseif(OV_COMPILER_IS_CLANG) set(IE_C_CXX_FLAGS "${IE_C_CXX_FLAGS} -fstack-protector-all") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") if (NOT ENABLE_SANITIZER) diff --git a/cmake/developer_package/features.cmake b/cmake/developer_package/features.cmake index 487dea8c7e382a..33e3530bac3359 100644 --- a/cmake/developer_package/features.cmake +++ b/cmake/developer_package/features.cmake @@ -58,7 +58,7 @@ ie_option (VERBOSE_BUILD "shows extra information about build" OFF) ie_option (ENABLE_UNSAFE_LOCATIONS "skip check for MD5 for dependency" OFF) -ie_dependent_option (ENABLE_FUZZING "instrument build for fuzzing" OFF "CMAKE_CXX_COMPILER_ID MATCHES ^(Apple)?Clang$; NOT WIN32" OFF) +ie_dependent_option (ENABLE_FUZZING "instrument build for fuzzing" OFF "OV_COMPILER_IS_CLANG; NOT WIN32" OFF) # # Check features diff --git a/cmake/developer_package/packaging.cmake b/cmake/developer_package/packaging.cmake index 4cb21210d4a166..4095a16157c068 100644 --- a/cmake/developer_package/packaging.cmake +++ b/cmake/developer_package/packaging.cmake @@ -53,7 +53,9 @@ macro(ie_cpack) set(CPACK_PACKAGE_VENDOR "Intel Corporation") set(CPACK_VERBATIM_VARIABLES ON) set(CPACK_COMPONENTS_ALL ${ARGN}) - set(CPACK_STRIP_FILES ON) + if (NOT DEFINED CPACK_STRIP_FILES) + set(CPACK_STRIP_FILES ON) + endif() set(CPACK_THREADS 8) string(REPLACE "/" "_" CPACK_PACKAGE_VERSION "${CI_BUILD_NUMBER}") diff --git a/cmake/developer_package/target_flags.cmake b/cmake/developer_package/target_flags.cmake index 181c4dd4187e1b..d4fd9837647005 100644 --- a/cmake/developer_package/target_flags.cmake +++ b/cmake/developer_package/target_flags.cmake @@ -55,3 +55,9 @@ endif() if(UNIX AND NOT APPLE) set(LINUX ON) endif() + +if(CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$") + set(OV_COMPILER_IS_CLANG ON) +else() + set(OV_COMPILER_IS_CLANG OFF) +endif() diff --git a/cmake/features.cmake b/cmake/features.cmake index ea32a7a42fe822..b7e23ee9226747 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -38,8 +38,6 @@ ie_dependent_option (ENABLE_PYTHON "enables ie python bridge build" OFF "PYTHONL find_package(PythonInterp 3 QUIET) ie_dependent_option (ENABLE_DOCS "Build docs using Doxygen" OFF "PYTHONINTERP_FOUND" OFF) -ie_option (ENABLE_SYSTEM_PUGIXML "use the system copy of pugixml" OFF) - # # Inference Engine specific options # @@ -112,7 +110,11 @@ ie_dependent_option(ENABLE_TBB_RELEASE_ONLY "Only Release TBB libraries are link ie_option (ENABLE_SYSTEM_PUGIXML "use the system copy of pugixml" OFF) -ie_option (ENABLE_CPU_DEBUG_CAPS "enable CPU debug capabilities at runtime" OFF) +ie_option (ENABLE_DEBUG_CAPS "enable OpenVINO debug capabilities at runtime" OFF) + +ie_dependent_option (ENABLE_GPU_DEBUG_CAPS "enable GPU debug capabilities at runtime" ON "ENABLE_DEBUG_CAPS" OFF) + +ie_dependent_option (ENABLE_CPU_DEBUG_CAPS "enable CPU debug capabilities at runtime" ON "ENABLE_DEBUG_CAPS" OFF) if(ANDROID OR WINDOWS_STORE OR (MSVC AND (ARM OR AARCH64))) set(protoc_available OFF) @@ -121,10 +123,12 @@ else() endif() ie_dependent_option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" ON "protoc_available" OFF) -ie_dependent_option(NGRAPH_ONNX_EDITOR_ENABLE "Enable ONNX Editor" ON "NGRAPH_ONNX_IMPORT_ENABLE" OFF) +ie_dependent_option(NGRAPH_ONNX_FRONTEND_ENABLE "Enable ONNX FrontEnd" OFF "NGRAPH_ONNX_IMPORT_ENABLE" OFF) ie_dependent_option(NGRAPH_PDPD_FRONTEND_ENABLE "Enable PaddlePaddle FrontEnd" ON "protoc_available" OFF) ie_dependent_option(NGRAPH_USE_PROTOBUF_LITE "Compiles and links with protobuf-lite" OFF "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF) +ie_dependent_option(NGRAPH_USE_SYSTEM_PROTOBUF "Use system protobuf" OFF + "NGRAPH_ONNX_IMPORT_ENABLE OR NGRAPH_PDPD_FRONTEND_ENABLE" OFF) ie_dependent_option(NGRAPH_UNIT_TEST_ENABLE "Enables ngraph unit tests" ON "ENABLE_TESTS;NOT ANDROID" OFF) ie_dependent_option(NGRAPH_UNIT_TEST_BACKENDS_ENABLE "Control the building of unit tests using backends" ON "NGRAPH_UNIT_TEST_ENABLE" OFF) diff --git a/cmake/templates/InferenceEngineConfig.cmake.in b/cmake/templates/InferenceEngineConfig.cmake.in index 261edbf3d730f3..43408483f9af6e 100644 --- a/cmake/templates/InferenceEngineConfig.cmake.in +++ b/cmake/templates/InferenceEngineConfig.cmake.in @@ -73,6 +73,10 @@ function(_ie_target_no_deprecation_error) else() set(flags "-Wno-error=deprecated-declarations") endif() + if(CMAKE_CROSSCOMPILING) + set_target_properties(${ARGV} PROPERTIES + INTERFACE_LINK_OPTIONS "-Wl,--allow-shlib-undefined") + endif() set_target_properties(${ARGV} PROPERTIES INTERFACE_COMPILE_OPTIONS ${flags}) endif() diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md new file mode 100644 index 00000000000000..f02d50499fd857 --- /dev/null +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_RetinaNet_From_Tensorflow.md @@ -0,0 +1,15 @@ +# Converting RetinaNet Model from TensorFlow* to the Intermediate Representation {#openvino_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_RetinaNet_From_Tensorflow} + +This tutorial explains how to convert RetinaNet model to the Intermediate Representation (IR). + +[Public RetinaNet model](https://github.com/fizyr/keras-retinanet) does not contain pretrained TensorFlow\* weights. +To convert this model to the TensorFlow\* format, you can use [Reproduce Keras* to TensorFlow* Conversion tutorial](https://docs.openvinotoolkit.org/latest/omz_models_model_retinanet_tf.html). + +After you convert the model to TensorFlow* format, run the Model Optimizer command below: +```sh +python mo.py --input "input_1[1 1333 1333 3]" --input_model retinanet_resnet50_coco_best_v2.1.0.pb --data_type FP32 --transformations_config ./extensions/front/tf/retinanet.json +``` + +Where `transformations_config` command-line parameter specifies the configuration json file containing model conversion hints for the Model Optimizer. +The json file contains some parameters that need to be changed if you train the model yourself. It also contains information on how to match endpoints +to replace the subgraph nodes. After the model is converted to IR, the output nodes will be replaced with DetectionOutput layer. diff --git a/docs/doxygen/ie_docs.xml b/docs/doxygen/ie_docs.xml index 19a87a1e11e97c..99e91e53ed572f 100644 --- a/docs/doxygen/ie_docs.xml +++ b/docs/doxygen/ie_docs.xml @@ -34,6 +34,7 @@ limitations under the License. + @@ -176,6 +177,7 @@ limitations under the License. + @@ -219,6 +221,7 @@ limitations under the License. + diff --git a/docs/ops/arithmetic/Acosh_3.md b/docs/ops/arithmetic/Acosh_3.md index 79fde27fbd3c20..9f858924d4e01e 100644 --- a/docs/ops/arithmetic/Acosh_3.md +++ b/docs/ops/arithmetic/Acosh_3.md @@ -6,32 +6,28 @@ **Short description**: *Acosh* performs element-wise hyperbolic inverse cosine (arccosh) operation with given tensor. -**Attributes**: +**Detailed description**: Operation takes one input tensor and performs the element-wise hyperbolic inverse cosine operation on a given input tensor, based on the following mathematical formula: - No attributes available. +\f[ +a_{i} = acosh(a_{i}) +\f] + +**Attributes**: *Acosh* operation has no attributes. **Inputs** -* **1**: A tensor of type *T*. **Required.** +* **1**: A tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise acosh operation. A tensor of type *T*. +* **1**: The result of element-wise *Acosh* operation. A tensor of type *T* and the same shape as the input tensor. **Types** -* *T*: any floating-point type. - -*Acosh* does the following with the input tensor *a*: - -\f[ -a_{i} = acosh(a_{i}) -\f] +* *T*: any numeric type. **Examples** -*Example 1* - ```xml diff --git a/docs/ops/arithmetic/Erf_1.md b/docs/ops/arithmetic/Erf_1.md index 6b445dafad29bb..52d2d0301cb679 100644 --- a/docs/ops/arithmetic/Erf_1.md +++ b/docs/ops/arithmetic/Erf_1.md @@ -4,34 +4,32 @@ **Category**: Arithmetic unary operation -**Short description**: *Erf* calculates the Gauss error function element-wise with given tensor. +**Short description**: *Erf* performs element-wise Gauss error function (erf) on a given input tensor. **Detailed Description** -For each element from the input tensor calculates corresponding element in the output tensor with the following formula: +*Erf* performs element-wise erf operation on a given input tensor, based on the following mathematical formula: + \f[ erf(x) = \pi^{-1} \int_{-x}^{x} e^{-t^2} dt \f] -**Attributes**: - - No attributes available. +**Attributes**: *Erf* operation has no attributes. **Inputs** -* **1**: A tensor of type *T*. **Required.** +* **1**: A tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise operation. A tensor of type *T*. +* **1**: The result of element-wise *Erf* function applied to the input tensor. A tensor of type *T* and the same shape as the input tensor. **Types** -* *T*: any supported floating-point type. +* *T*: any supported numeric type. -**Examples** -*Example 1* +**Example** ```xml diff --git a/docs/ops/arithmetic/Sign_1.md b/docs/ops/arithmetic/Sign_1.md index e68cc51f97f7c7..1aa87097e62136 100644 --- a/docs/ops/arithmetic/Sign_1.md +++ b/docs/ops/arithmetic/Sign_1.md @@ -4,33 +4,30 @@ **Category**: Arithmetic unary operation -**Short description**: *Sign* performs element-wise sign operation with given tensor. +**Short description**: *Sign* performs element-wise sign operation on a given input tensor. -**Attributes**: +**Detailed description**: *Sign* performs element-wise sign operation on a given input tensor, based on the following mathematical formula: - No attributes available. +\f[ +a_{i} = sign(a_{i}) +\f] + +**Attributes**: *Sign* operation has no attributes. **Inputs** -* **1**: An tensor of type *T*. **Required.** +* **1**: A tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: The result of element-wise sign operation. A tensor of type *T* with mapped elements of the input tensor to -1 (if it is negative), 0 (if it is zero), or 1 (if it is positive). +* **1**: The result of element-wise *Sign* operation. A tensor of type *T* with mapped elements of the input tensor to -1 (if it is negative), 0 (if it is zero), or 1 (if it is positive). **Types** * *T*: any numeric type. -*Sign* does the following with the input tensor *a*: - -\f[ -a_{i} = sign(a_{i}) -\f] - -**Examples** -*Example 1* +**Example** ```xml diff --git a/docs/ops/condition/If_8.md b/docs/ops/condition/If_8.md new file mode 100644 index 00000000000000..7de2449b1eada1 --- /dev/null +++ b/docs/ops/condition/If_8.md @@ -0,0 +1,226 @@ +## If {#openvino_docs_ops_infrastructure_If_8} + +**Versioned name**: *If-8* + +**Category**: Infrastructure + +**Short description**: *If* operation contains two internal networks(subgraphs) such as `then_body` and `else_body`, +and performs one of them depending on `cond` value. If `cond` is `True`, `then_body` is executed. If `cond` is `False`, +the operation executes the `else_body` subgraph. + +**Detailed description** + +*If* must not contain empty subgraphs. Each of them must have at least one operation `Result`. +Also the number of outputs from *If* always must be greater than zero and equal to the number of outputs from each subgraph. + +**If attributes**: + +* **Subgraphs**: + + `then_body`/`else_body` are subgraphs that are executed depending on the `cond` value. + The subgraph is described operation by operation as a typical IR network. + The subgraph has inputs (`Parameter` operations) and outputs (`Result` operations). + + * **Subgraph's inputs** - inputs to the subgraph which associated with *If* inputs via *port_map*. + The subgraph can have any number of inputs (even zero). + + * **Subgraph's outputs** - outputs from the subgraph which associated with *If* outputs via *port_map*. + The subgraph must contain at least one output. Each *If* output is associated with one output from the subgraph. + Therefore the number of `then_body` outputs is equal to the number of outputs from *If* and + the number of `else_body` outputs. + The type of the subgraph output and the type of the associated output from *If* must be equal. + + +* **Port maps**: + + *port_map* is a set of rules to map input or output data tensors of *If* operation onto the subgraph data tensors. + The `port_map` entries can be `input` and `output`. Each entry describes a corresponding mapping rule. + *If* has two *port_maps*: `then_port_map` for `then_body` and `else_port_map` for `else_body`. + + * **Port map attributes**: + + * *external_port_id* + * **Description**: *external_port_id* is a port ID of *If* operation. + * **Range of values**: IDs of the *If* inputs and outputs + * **Type**: `unsigned int` + * **Default value**: None + * **Required**: *yes* + + * *internal_layer_id* + + * **Description**: *internal_layer_id* is a `Parameter` or `Result` operation ID inside + the subgraph to map to. + * **Range of values**: IDs of the `Parameter` or `Result` operations in the subgraph + * **Type**: `unsigned int` + * **Default value**: None + * **Required**: *yes* + +**If Inputs** + + +* **cond**: A scalar or 1D tensor with 1 element of `boolean` type specifying which subgraph to execute. +`True` value means to execute the `then_body`, `False` - `else_body`. *Required*. + +* **Multiple other inputs**: Tensors of different types and shapes. *Optional*. + +**If Outputs** + +* **Multiple outputs**: Results of execution of one of the subgraph. Tensors of any type and shape. + + +**Body Inputs** + +* **Multiple inputs**: Tensors of different types and shapes. *Optional*. + + +**Body Outputs** + +* **Multiple outputs**: Results of execution of the subgraph. Tensors of any type and shape. + + +**Examples** + +*Example 1: a typical If structure* +```xml + + + + + 2 + 4 + + + 2 + 4 + + + 2 + 4 + + + + + 2 + 4 + + + + + + + + + + + + + + + + + + + 2 + 4 + + + + + + + + 2 + 4 + + + + + + + + 2 + 4 + + + 2 + 4 + + + + + 2 + 4 + + + + + + + 2 + 4 + + + + + + + + + + + + + + + + + 2 + 4 + + + + + + + + 2 + 4 + + + + + + + + 2 + 4 + + + 2 + 4 + + + + + 2 + 4 + + + + + + + 2 + 4 + + + + + + + + + + + +``` diff --git a/docs/ops/condition/Select_1.md b/docs/ops/condition/Select_1.md index 8f51624961078e..56e5fde8eab790 100644 --- a/docs/ops/condition/Select_1.md +++ b/docs/ops/condition/Select_1.md @@ -17,26 +17,31 @@ * **Description**: specifies rules used for auto-broadcasting of input tensors. * **Range of values**: - * *none* - no auto-broadcasting is allowed, all input shapes should match - * *numpy* - numpy broadcasting rules, aligned with ONNX Broadcasting. Description is available in ONNX docs. - * **Type**: string + * *none* - no auto-broadcasting is allowed, all input shapes must match + * *numpy* - numpy broadcasting rules, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md) + * *pdpd* - PaddlePaddle-style implicit broadcasting, description is available in [Broadcast Rules For Elementwise Operations](../broadcast_rules.md) + * **Type**: `string` * **Default value**: "numpy" * **Required**: *no* **Inputs**: -* **1**: `cond` tensor with selection mask of type `boolean`. The tensor can be 0D. +* **1**: `cond` - tensor of type *T_COND* and arbitrary shape with selection mask. **Required**. -* **2**: `then` the tensor with elements to take where the corresponding element in `cond` is true. Arbitrary type that should match type of `else` input tensor. +* **2**: `then` - tensor of type *T* and arbitrary shape with elements to take where the corresponding element in `cond` is `true`. **Required**. -* **3**: `else` the tensor with elements to take where the corresponding element in `cond` is false. Arbitrary type that should match type of `then` input tensor. +* **3**: `else` - tensor of type *T* and arbitrary shape with elements to take where the corresponding element in `cond` is `false`. **Required**. **Outputs**: * **1**: blended output tensor that is tailored from values of inputs tensors `then` and `else` based on `cond` and broadcasting rules. It has the same type of elements as `then` and `else`. +**Types** + +* *T_COND*: `boolean` type. +* *T*: any supported numeric type. **Example** diff --git a/docs/ops/generation/RandomUniform_8.md b/docs/ops/generation/RandomUniform_8.md new file mode 100644 index 00000000000000..4269c82bc6a8aa --- /dev/null +++ b/docs/ops/generation/RandomUniform_8.md @@ -0,0 +1,231 @@ +## RandomUniform {#openvino_docs_ops_generation_RandomUniform_8} + +**Versioned name**: *RandomUniform-8* + +**Category**: Generation + +**Short description**: *RandomUniform* operation generates a sequence of random values from a uniform distribution. + +**Detailed description**: + +*RandomUniform* operation generates random numbers from a uniform distribution in the range `[*minval*, *maxval*)`. +The generation algorithm is based on underlying random integer generator that uses Philox algorithm. Philox algorithm +is a counter-based pseudo-random generator, which produces uint32 values. Single invocation of Philox algorithm returns +four result random values, depending on the given *key* and *counter* values. *Key* and *counter* are initialized +with *seed* and *seed2* attributes respectively. + +\f[ +key = seed\\ +counter = seed2 +\f] + +Link to the original paper [Parallel Random Numbers: As Easy as 1, 2, 3](https://www.thesalmons.org/john/random123/papers/random123sc11.pdf) + +The result of Philox is calculated by applying a fixed number of *key* and *counter* updating so-called "rounds". +This implementation uses 4x32_10 version of Philox algorithm, where number of rounds = 10. + +Suppose we have *n* which determines *n*-th 4 elements of random sequence. +In each round *key*, *counter* and *n* are splitted to pairs of uint32 values: + +\f[ +R = cast\_to\_uint32(value)\\ +L = cast\_to\_uint32(value >> 32), +\f] +where *cast\_to\_uint32* - static cast to uint32, *value* - uint64 input value, *L*, *R* - uint32 +result values, >> - bitwise right shift. + +Then *n* and *counter* are updated with the following formula: + +\f[ +L'= mullo(R, M)\\ +R' = mulhi(R, M) {\oplus} k {\oplus} L \\ +mulhi(a, b) = floor((a {\times} b) / 2^{32}) \\ +mullo(a, b) = (a {\times} b) \mod 2^{32} +\f] +where `{\oplus}` - bitwise xor, *k* = `R_{key}` for updating counter, *k* = `L_{key}` for updating *n*, +*M* = `0xD2511F53` for updating *n*, *M* = `0xCD9E8D57` for updating *counter*. + +After each round *key* is raised by summing with another pair of const values: +\f[ +L += 0x9E3779B9 \\ +R += 0xBB67AE85 +\f] +Values *L'_{n}*, *R'_{n}*, *L'_{counter}*, *R'_{counter}* are resulting four random numbers. + +Float values between [0..1) are obtained from 32-bit integers by the following rules. + +Float16 is formatted as follows: *sign*(1 bit) *exponent*(5 bits) *mantissa*(10 bits). The value is interpreted +using following formula: +\f[ +(-1)^{sign} * 1, mantissa * 2 ^{exponent - 15} +\f] + +so to obtain float16 values *sign*, *exponent* and *mantissa* are set as follows: +``` +sign = 0 +exponent = 15 - representation of a zero exponent. +mantissa = 10 right bits from generated uint32 random value. +``` + +So the resulting float16 value is: +``` +x_uint16 = x // Truncate the upper 16 bits. +val = ((exponent << 10) | x_uint16 & 0x3ffu) - 1.0, +``` +where x is uint32 generated random value. + +Float32 is formatted as follows: *sign*(1 bit) *exponent*(8 bits) *mantissa*(23 bits). The value is interpreted +using following formula: +\f[ +(-1)^{sign} * 1, mantissa * 2 ^{exponent - 127} +\f] + +so to obtain float values *sign*, *exponent* and *mantissa* are set as follows: +``` +sign = 0 +exponent = 127 - representation of a zero exponent. +mantissa = 23 right bits from generated uint32 random value. +``` + +So the resulting float value is: +``` +val = ((exponent << 23) | x & 0x7fffffu) - 1.0, +``` +where x is uint32 generated random value. + +Double is formatted as follows: *sign*(1 bit) *exponent*(11 bits) *mantissa*(52 bits). The value is interpreted +using following formula: +\f[ +(-1)^{sign} * 1, mantissa * 2 ^{exponent - 1023} +\f] + +so to obtain double values *sign*, *exponent* and *mantissa* are set as follows: +``` +sign = 0 +exponent = 1023 - representation of a zero exponent. +mantissa = 52 right bits from two concatinated uint32 values from random integer generator. +``` + +So the resulting double is obtained as follows: +``` +mantissa_h = x0 & 0xfffffu; // upper 20 bits of mantissa +mantissa_l = x1; // lower 32 bits of mantissa +mantissa = (mantissa_h << 32) | mantissa_l; +val = ((exponent << 52) | mantissa) - 1.0, +``` +where x0, x1 are uint32 generated random values. + +To obtain a value in a specified range each value is processed with the following formulas: + +For float values: +\f[ +result = x * (maxval - minval) + minval, +\f] +where *x* is random float or double value between [0..1). + +For integer values: +\f[ +result = x \mod (maxval - minval) + minval, +\f] +where *x* is uint32 random value. + + +Example 1. *RandomUniform* output with `seed` = 150, `seed2` = 10, `output_type` = f32: + +``` +input_shape = [ 3, 3 ] +output = [[0.7011236 0.30539632 0.93931055] + [0.9456035 0.11694777 0.50770056] + [0.5197197 0.22727466 0.991374 ]] +``` + +Example 2. *RandomUniform* output with `seed` = 80, `seed2` = 100, `output_type` = double: + +``` +input_shape = [ 2, 2 ] + +minval = 2 + +maxval = 10 + +output = [[5.65927959 4.23122376] + [2.67008206 2.36423758]] +``` + +Example 3. *RandomUniform* output with `seed` = 80, `seed2` = 100, `output_type` = i32: + +``` +input_shape = [ 2, 3 ] + +minval = 50 + +maxval = 100 + +output = [[65 70 56] + [59 82 92]] +``` + +**Attributes**: + +* *output_type* + + * **Description**: the type of the output. Determines generation algorithm and affects resulting values. + Output numbers generated for different values of *output_type* may not be equal. + * **Range of values**: "i32", "i64", "f16", "bf16", "f32", "f64". + * **Type**: string + * **Required**: *Yes* + +* *seed* + + * **Description**: global seed value. + * **Range of values**: positive integers + * **Type**: `int` + * **Required**: *Yes* + +* *seed2* + + * **Description**: operational seed value. + * **Range of values**: positive integers + * **Type**: `int` + * **Required**: *Yes* + +**Inputs**: + +* **1**: `shape` - 1D tensor of type *T_SHAPE* describing output shape. **Required.** + +* **2**: `minval` - scalar or 1D tensor with 1 element with type specified by the attribute *output_type*, + defines the lower bound on the range of random values to generate (inclusive). **Required.** + +* **3**: `maxval` - scalar or 1D tensor with 1 element with type specified by the attribute *output_type*, + defines the upper bound on the range of random values to generate (exclusive). **Required.** + + +**Outputs**: + +* **1**: A tensor with type specified by the attribute *output_type* and shape defined by `shape` input tensor. + +**Types** + +* *T_SHAPE*: `int32` or `int64`. + +*Example 1: IR example.* + +```xml + + + + + 3 + + + + + + + 2 + 3 + 10 + + + +``` diff --git a/docs/ops/normalization/MVN_1.md b/docs/ops/normalization/MVN_1.md index ef8a37204dd82f..a82c9a9ca40531 100644 --- a/docs/ops/normalization/MVN_1.md +++ b/docs/ops/normalization/MVN_1.md @@ -4,57 +4,89 @@ **Category**: *Normalization* -**Short description**: [Reference](http://caffe.berkeleyvision.org/tutorial/layers/mvn.html) +**Short description**: Calculates mean-variance normalization of the input tensor. Supports two normalization techniques: [Instance/Contrast Normalization](https://arxiv.org/abs/1607.08022) and [Layer Normalization](https://arxiv.org/abs/1607.06450). **Detailed description** -*MVN* subtracts mean value from the input blob: +Based on `across_channels` attribute mean value is calculated using one of formulas below: + +1. if `true` mean value is calculated using Layer Normalization: +\f[ +\mu_{n} = \frac{\sum_{c}^{C}\sum_{h}^{H}\sum_{w}^{W} i_{nchw}}{C * H * W} +\f] +2. if `false` mean value is calculated using Instance/Contrast Normalization: \f[ -o_{i} = i_{i} - \frac{\sum{i_{k}}}{C * H * W} +\mu_{nc} = \frac{\sum_{h}^{H}\sum_{w}^{W} i_{nchw}}{H * W} \f] -If *normalize_variance* is set to 1, the output blob is divided by variance: + +where \f$i_{nchw}\f$ is an input tensor parametrized by \f$n\f$ batches, \f$c\f$ channels and \f$h,w\f$ spatial dimesnions. + +If `reduction_axes` attribute is provided mean value is calculated based on formula: \f[ -o_{i}=\frac{o_{i}}{\sum \sqrt {o_{k}^2}+\epsilon} +\mu_{n} = ReduceMean(i_{k}, reduction_axes) \f] +Afterwards *MVN* subtracts mean value from the input blob. + +If *normalize_variance* is set to `true`, the output blob is divided by variance: +\f[ +o_{i}=\frac{o_{i}}{\sqrt {\sum {\sigma_{k}^2}+\epsilon}} +\f] + +where \f$\sigma_{k}^2\f$ is the variance calculated based on mean value, \f$\epsilon\f$ is a value added to the variance for numerical stability and corresponds to `epsilon` attribute. + **Attributes** * *across_channels* - * **Description**: *across_channels* is a flag that specifies whether mean values are shared across channels. For example, *across_channels* equal to `false` means that mean values are not shared across channels. + * **Description**: *across_channels* is a flag that specifies whether mean values are shared across channels. If `true` mean values and variance are calculated for each sample across all channels and spatial dimensions (Layer Normalization), otherwise calculation is done for each sample and for each channel across spatial dimensions (Instance/Contrast Normalization). * **Range of values**: * `false` - do not share mean values across channels * `true` - share mean values across channels * **Type**: `boolean` - * **Default value**: `false` - * **Required**: *no* + * **Required**: *yes* + +* *reduction_axes* + + * **Description**: 1D tensor of unique elements and type *T_IND* which specifies indices of dimensions in `data` that define normalization slices. Negative value means counting dimensions from the back. + * **Range of values**: allowed range of axes is `[-r; r-1]` where `r = rank(data)`, the order cannot be sorted + * **Type**: `int` + * **Required**: *yes* * *normalize_variance* * **Description**: *normalize_variance* is a flag that specifies whether to perform variance normalization. * **Range of values**: - * `false` -- do not normalize variance - * `true` -- normalize variance + * `false` - do not normalize variance + * `true` - normalize variance * **Type**: `boolean` - * **Default value**: `false` - * **Required**: *no* + * **Required**: *yes* * *eps* * **Description**: *eps* is the number to be added to the variance to avoid division by zero when normalizing the value. For example, *epsilon* equal to 0.001 means that 0.001 is added to the variance. * **Range of values**: a positive floating-point number - * **Type**: `float` + * **Type**: `double` * **Required**: *yes* +* **Note** Important: it is necessary to use only one of `across_channels` or `reduction_axes` attributes, they cannot be defined together. + **Inputs** -* **1**: 4D or 5D input tensor of any floating-point type. **Required.** +* **1**: `data` - input tensor of type *T* and arbitrary shape. **Required.** **Outputs** -* **1**: normalized tensor of the same type and shape as input tensor. +* **1**: normalized tensor of type *T* and shape as input tensor. -**Example** +**Types** + +* *T*: any floating point type. +* *T_IND*: `int64` or `int32`. + +**Examples** + +*Example: with `across_channels` attribute* ```xml @@ -77,3 +109,27 @@ o_{i}=\frac{o_{i}}{\sum \sqrt {o_{k}^2}+\epsilon} ``` + +*Example: with `reduction_axes` attribute* + +```xml + + + + + 6 + 12 + 10 + 24 + + + + + 6 + 12 + 10 + 24 + + + +``` diff --git a/docs/ops/normalization/MVN_6.md b/docs/ops/normalization/MVN_6.md index 9de691458c462d..f89cf60e92df7e 100644 --- a/docs/ops/normalization/MVN_6.md +++ b/docs/ops/normalization/MVN_6.md @@ -30,8 +30,8 @@ o_{i}=\frac{o_{i}}{\sqrt {\sum {o_{k}^2}}+\epsilon} * **Description**: *normalize_variance* is a flag that specifies whether to perform variance normalization. * **Range of values**: - * `false` -- Do not normalize variance - * `true` -- Normalize variance + * `false` - do not normalize variance + * `true` - normalize variance * **Type**: `boolean` * **Required**: *yes* @@ -46,14 +46,14 @@ o_{i}=\frac{o_{i}}{\sqrt {\sum {o_{k}^2}}+\epsilon} * **Description**: Choose where to add epsilon. * **Range of values**: - * `inside_sqrt` -- Add epsilon inside sqrt - * `outside_sqrt` -- Add epsilon outside of sqrt + * `inside_sqrt` - add epsilon inside sqrt + * `outside_sqrt` - add epsilon outside of sqrt * **Type**: `string` * **Required**: *yes* **Inputs** -* **1**: `data` - Input tensor to be normalized. Type *T*. **Required.** +* **1**: `data` - Input tensor to be normalized of type *T* and arbitrary shape. **Required.** * **2**: `axes` - 1D tensor which specifies indices of dimensions in `data` that define normalization slices. Allowed range of axes is `[-r; r-1]` where `r = rank(data)`, the order can be not sorted. Negative value means counting dimensions from the back. Type *T_IND*. **Required.** @@ -63,8 +63,7 @@ o_{i}=\frac{o_{i}}{\sqrt {\sum {o_{k}^2}}+\epsilon} **Types** -* *T*: any floating-point type. - +* *T*: any floating point type. * *T_IND*: `int64` or `int32`. **Example** diff --git a/docs/ops/opset8.md b/docs/ops/opset8.md index 02e97eab4e42f6..4c71a0bb2fa7fc 100644 --- a/docs/ops/opset8.md +++ b/docs/ops/opset8.md @@ -79,6 +79,7 @@ declared in `namespace opset8`. * [HSigmoid](activation/HSigmoid_5.md) * [HSwish](activation/HSwish_4.md) * [IDFT](signals/IDFT_7.md) +* [If](condition/If_8.md) * [Interpolate](image/Interpolate_4.md) * [Less](comparison/Less_1.md) * [LessEqual](comparison/LessEqual_1.md) @@ -114,6 +115,7 @@ declared in `namespace opset8`. * [PriorBox](detection/PriorBox_1.md) * [Proposal](detection/Proposal_4.md) * [PSROIPooling](detection/PSROIPooling_1.md) +* [RandomUniform](generation/RandomUniform_8.md) * [Range](generation/Range_4.md) * [ReLU](activation/ReLU_1.md) * [ReadValue](infrastructure/ReadValue_3.md) diff --git a/docs/template_extension/cpu_kernel.cpp b/docs/template_extension/cpu_kernel.cpp index aa2486589cbff2..b1d426b15825ce 100644 --- a/docs/template_extension/cpu_kernel.cpp +++ b/docs/template_extension/cpu_kernel.cpp @@ -102,6 +102,7 @@ InferenceEngine::StatusCode OpImplementation::init(InferenceEngine::LayerConfig& IE_THROW() << "Operation supports only FP32 precisions!"; } } catch (InferenceEngine::Exception& ex) { + error = ex.what(); if (resp) { strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1); resp->msg[sizeof(resp->msg) - 1] = 0; diff --git a/docs/template_extension/fft_kernel.cpp b/docs/template_extension/fft_kernel.cpp index 12554a70c75406..3fcf71a8f641b1 100644 --- a/docs/template_extension/fft_kernel.cpp +++ b/docs/template_extension/fft_kernel.cpp @@ -66,6 +66,7 @@ InferenceEngine::StatusCode FFTImpl::init(InferenceEngine::LayerConfig& config, IE_THROW() << "Operation supports only FP32 precisions!"; } } catch (InferenceEngine::Exception& ex) { + error = ex.what(); if (resp) { strncpy(resp->msg, error.c_str(), sizeof(resp->msg) - 1); resp->msg[sizeof(resp->msg) - 1] = 0; diff --git a/docs/template_plugin/tests/functional/op_reference/acosh.cpp b/docs/template_plugin/tests/functional/op_reference/acosh.cpp new file mode 100644 index 00000000000000..e854c98b7e0f7a --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/acosh.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace ngraph; + +namespace reference_tests { +namespace { + +struct AcoshParams { + Tensor input; + Tensor expected; +}; + +struct Builder : ParamsBuilder { + REFERENCE_TESTS_ADD_SET_PARAM(Builder, input); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, expected); +}; + +class ReferenceAcoshLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.input.shape, params.input.type); + inputData = {params.input.data}; + refOutData = {params.expected.data}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.input.shape << "_"; + result << "type=" << param.input.type; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const Shape& shape, const element::Type& type) { + const auto in = std::make_shared(type, shape); + const auto acosh = std::make_shared(in); + return std::make_shared(NodeVector {acosh}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceAcoshLayerTest, AcoshWithHardcodedRefs) { + Exec(); +} + +} // namespace + +INSTANTIATE_TEST_SUITE_P( + smoke_Acosh_With_Hardcoded_Refs, ReferenceAcoshLayerTest, + ::testing::Values(Builder {} + .input({{8}, element::f16, std::vector {1.f, 2.f, 3.f, 4.f, 5.f, 10.f, 100.f, 1000.f}}) + .expected({{8}, element::f16, std::vector {0., 1.317, 1.763, 2.063, 2.292, 2.993, 5.298, 7.6012}}), + Builder {} + .input({{8}, element::f32, std::vector {1.f, 2.f, 3.f, 4.f, 5.f, 10.f, 100.f, 1000.f}}) + .expected({{8}, element::f32, std::vector {0., 1.317, 1.763, 2.063, 2.292, 2.993, 5.298, 7.6012}}), + Builder {} + .input({{8}, element::i32, std::vector {1, 2, 3, 4, 5, 10, 100, 1000}}) + .expected({{8}, element::i32, std::vector {0, 1, 2, 2, 2, 3, 5, 8}}), + Builder {} + .input({{8}, element::i64, std::vector {1, 2, 3, 4, 5, 10, 100, 1000}}) + .expected({{8}, element::i64, std::vector {0, 1, 2, 2, 2, 3, 5, 8}}), + Builder {} + .input({{8}, element::u32, std::vector {1, 2, 3, 4, 5, 10, 100, 1000}}) + .expected({{8}, element::u32, std::vector {0, 1, 2, 2, 2, 3, 5, 8}}), + Builder {} + .input({{8}, element::u64, std::vector {1, 2, 3, 4, 5, 10, 100, 1000}}) + .expected({{8}, element::u64, std::vector {0, 1, 2, 2, 2, 3, 5, 8}})), + ReferenceAcoshLayerTest::getTestCaseName); +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp b/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp index 51af4d2ea1a221..f2d2cf68aa39a2 100644 --- a/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp +++ b/docs/template_plugin/tests/functional/op_reference/base_reference_test.cpp @@ -9,6 +9,8 @@ using namespace InferenceEngine; +namespace reference_tests { + CommonReferenceTest::CommonReferenceTest(): targetDevice("TEMPLATE") { core = PluginCache::get().ie(targetDevice); } @@ -171,3 +173,5 @@ void CommonReferenceTest::ValidateBlobs(const InferenceEngine::Blob::Ptr& refBlo FAIL() << "Comparator for " << precision << " precision isn't supported"; } } + +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp b/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp index 6e3fd942a9e722..de08533405e566 100644 --- a/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp +++ b/docs/template_plugin/tests/functional/op_reference/base_reference_test.hpp @@ -5,8 +5,12 @@ #include #include #include +#include +#include #include +namespace reference_tests { + class CommonReferenceTest { public: CommonReferenceTest(); @@ -51,3 +55,55 @@ InferenceEngine::Blob::Ptr CreateBlob(const ngraph::element::Type& element_type, return blob; } +/// +/// Class which should help to build data for single input +/// +struct Tensor { + Tensor() = default; + + Tensor(const ngraph::Shape& shape, ngraph::element::Type type, const InferenceEngine::Blob::Ptr& data): shape {shape}, type {type}, data {data} {} + + template + Tensor(const ngraph::Shape& shape, ngraph::element::Type type, const std::vector& data_elements) + : Tensor {shape, type, CreateBlob(type, data_elements)} {} + + ngraph::Shape shape; + ngraph::element::Type type; + InferenceEngine::Blob::Ptr data; +}; + +/// +/// Class which should helps build test parameters. +/// +/// e.g.: +/// struct Params { +/// Tensor i,o; +/// int mul; +/// }; +/// struct TestParamsBuilder : ParamsBuilder +/// REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, i); +/// REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, o); +/// REFERENCE_TESTS_ADD_SET_PARAM(TestParamsBuilder, mul); +/// }; +/// +/// const Params p = TestParamsBuilder{} +/// .i(Tensor{{0}, i32, {1}}) +/// .o(Tensor{{0}, i32, {1}}) +/// .mul(10); +template +class ParamsBuilder { +protected: + Params params; + +public: + operator Params() const { + return params; + } +}; +#define REFERENCE_TESTS_ADD_SET_PARAM(builder_type, param_to_set) \ + builder_type& param_to_set(decltype(params.param_to_set) t) { \ + params.param_to_set = std::move(t); \ + return *this; \ + } + +} // namespace reference_tests diff --git a/docs/template_plugin/tests/functional/op_reference/convert.cpp b/docs/template_plugin/tests/functional/op_reference/convert.cpp index fb32fda4cbbfd8..b8e6f5846f7408 100644 --- a/docs/template_plugin/tests/functional/op_reference/convert.cpp +++ b/docs/template_plugin/tests/functional/op_reference/convert.cpp @@ -12,6 +12,7 @@ #include "base_reference_test.hpp" +using namespace reference_tests; using namespace ngraph; using namespace InferenceEngine; diff --git a/docs/template_plugin/tests/functional/op_reference/erf.cpp b/docs/template_plugin/tests/functional/op_reference/erf.cpp new file mode 100644 index 00000000000000..bd888a8e03c90f --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/erf.cpp @@ -0,0 +1,94 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + +struct ErfParams { + template + ErfParams(const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector& iValues) + : pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)) { + std::vector oValues; + std::vector output; + for (auto element : iValues) + output.push_back(static_cast(element)); + + std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double { + return std::erf(input); + }); + + if (std::is_integral()) { + std::transform(output.begin(), output.end(), output.begin(), [](double input) -> double { + return std::round(input); + }); + } + + for (auto element : output) + oValues.push_back(static_cast(element)); + refData = CreateBlob(outType, oValues); + } + ngraph::PartialShape pshape; + ngraph::element::Type inType; + ngraph::element::Type outType; + InferenceEngine::Blob::Ptr inputData; + InferenceEngine::Blob::Ptr refData; +}; + +class ReferenceErfLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.pshape, params.inType, params.outType); + inputData = {params.inputData}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.pshape << "_"; + result << "iType=" << param.inType << "_"; + result << "oType=" << param.outType; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const PartialShape& input_shape, const element::Type& input_type, + const element::Type& expected_output_type) { + const auto in = std::make_shared(input_type, input_shape); + const auto erf = std::make_shared(in); + return std::make_shared(NodeVector {erf}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceErfLayerTest, CompareWithRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_Erf_With_Hardcoded_Refs, ReferenceErfLayerTest, + ::testing::Values(ErfParams(ngraph::PartialShape {2, 5}, ngraph::element::f32, + std::vector {-INFINITY, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, INFINITY}), + ErfParams(ngraph::PartialShape {2, 5}, ngraph::element::f16, + std::vector {-INFINITY, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, INFINITY}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::i32, + std::vector {std::numeric_limits::min(), -2, -1, 1, 2, std::numeric_limits::max()}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::u32, + std::vector {std::numeric_limits::min(), 0, 1, 2, 3, std::numeric_limits::max()}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::i64, + std::vector {std::numeric_limits::min(), -2, -1, 1, 2, std::numeric_limits::max()}), + ErfParams(ngraph::PartialShape {2, 3}, ngraph::element::u64, + std::vector {std::numeric_limits::min(), 0, 1, 2, 3, std::numeric_limits::max()})), + ReferenceErfLayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/grn.cpp b/docs/template_plugin/tests/functional/op_reference/grn.cpp index 4d003b9b9a2fef..e7fc0c79f6b82b 100644 --- a/docs/template_plugin/tests/functional/op_reference/grn.cpp +++ b/docs/template_plugin/tests/functional/op_reference/grn.cpp @@ -12,21 +12,22 @@ #include "base_reference_test.hpp" +using namespace reference_tests; using namespace ngraph; using namespace InferenceEngine; namespace { struct GrnParams { template - GrnParams(const float bias, const ngraph::PartialShape& shape, const ngraph::element::Type& iType, const std::vector& iValues, + GrnParams(const float bias, const PartialShape& shape, const element::Type& iType, const std::vector& iValues, const std::vector& oValues) : bias(bias), pshape(shape), inType(iType), outType(iType), inputData(CreateBlob(iType, iValues)), refData(CreateBlob(iType, oValues)) {} float bias; - ngraph::PartialShape pshape; - ngraph::element::Type inType; - ngraph::element::Type outType; - InferenceEngine::Blob::Ptr inputData; - InferenceEngine::Blob::Ptr refData; + PartialShape pshape; + element::Type inType; + element::Type outType; + Blob::Ptr inputData; + Blob::Ptr refData; }; class ReferenceGrnLayerTest : public testing::TestWithParam, public CommonReferenceTest { @@ -60,21 +61,21 @@ TEST_P(ReferenceGrnLayerTest, CompareWithHardcodedRefs) { } template -std::vector generateGrnParams(const ngraph::element::Type& type) { +std::vector generateGrnParams(const element::Type& type) { using T = typename element_type_traits::value_type; std::vector grnParams { // bias 1e-6 // 2D // 3D // 4D - GrnParams(1e-6, ngraph::PartialShape {3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + GrnParams(1e-6, PartialShape {3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, std::vector {0.182574, 0.365148, 0.547723, 0.730297, 0.379049, 0.454859, 0.530669, 0.606478, 0.426162, 0.473514, 0.520865, 0.568217}), - GrnParams(1e-6, ngraph::PartialShape {2, 3, 4}, type, + GrnParams(1e-6, PartialShape {2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, std::vector {0.0966737, 0.169031, 0.224231, 0.267261, 0.483368, 0.507093, 0.523205, 0.534522, 0.870063, 0.845154, 0.822179, 0.801784, 0.433574, 0.441836, 0.449215, 0.455842, 0.566982, 0.568075, 0.569005, 0.569803, 0.700389, 0.694314, 0.688796, 0.683763}), - GrnParams(1e-6, ngraph::PartialShape {1, 2, 3, 4}, type, + GrnParams(1e-6, PartialShape {1, 2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, std::vector {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214, 0.997055, 0.989949, 0.980581, 0.970143, 0.959365, 0.948683, 0.938343, 0.928477, 0.919145, 0.910366, 0.902134, 0.894427}), - GrnParams(1e-6, ngraph::PartialShape {2, 2, 3, 4}, type, + GrnParams(1e-6, PartialShape {2, 2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}, std::vector {0.0766965, 0.141421, 0.196116, 0.242536, 0.282166, 0.316228, 0.345705, 0.371391, 0.393919, 0.413803, 0.431455, 0.447214, @@ -82,17 +83,17 @@ std::vector generateGrnParams(const ngraph::element::Type& type) { 0.559857, 0.564684, 0.56921, 0.573462, 0.577465, 0.581238, 0.584802, 0.588172, 0.591364, 0.594391, 0.597266, 0.6, 0.828589, 0.825307, 0.822192, 0.819232, 0.816416, 0.813733, 0.811176, 0.808736, 0.806405, 0.804176, 0.802043, 0.8}), // bias 100.25 // 2D // 3D // 4D - GrnParams(100.25, ngraph::PartialShape {3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + GrnParams(100.25, PartialShape {3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, std::vector {0.0876216, 0.175243, 0.262865, 0.350486, 0.301923, 0.362308, 0.422693, 0.483077, 0.385076, 0.427863, 0.470649, 0.513435}), - GrnParams(100.25, ngraph::PartialShape {2, 3, 4}, type, + GrnParams(100.25, PartialShape {2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, std::vector {0.0694629, 0.129032, 0.179525, 0.222137, 0.347314, 0.387097, 0.418891, 0.444273, 0.625166, 0.645161, 0.658258, 0.66641, 0.41125, 0.421303, 0.430287, 0.438356, 0.537789, 0.541675, 0.54503, 0.547945, 0.664327, 0.662047, 0.659774, 0.657534}), - GrnParams(100.25, ngraph::PartialShape {1, 2, 3, 4}, type, + GrnParams(100.25, PartialShape {1, 2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}, std::vector {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229, 0.401596, 0.418994, 0.790789, 0.807954, 0.820457, 0.829283, 0.835252, 0.839026, 0.841128, 0.841965, 0.841854, 0.841037, 0.839701, 0.837989f}), - GrnParams(100.25, ngraph::PartialShape {2, 2, 3, 4}, type, + GrnParams(100.25, PartialShape {2, 2, 3, 4}, type, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48}, std::vector {0.0608299, 0.115422, 0.164091, 0.207321, 0.245662, 0.279675, 0.309889, 0.336786, 0.360795, 0.38229, 0.401596, 0.418994, @@ -103,9 +104,9 @@ std::vector generateGrnParams(const ngraph::element::Type& type) { } std::vector generateGrnCombinedParams() { - const std::vector> grnTypeParams {generateGrnParams(ngraph::element::bf16), - generateGrnParams(ngraph::element::f16), - generateGrnParams(ngraph::element::f32)}; + const std::vector> grnTypeParams {generateGrnParams(element::bf16), + generateGrnParams(element::f16), + generateGrnParams(element::f32)}; std::vector combinedParams; std::for_each(grnTypeParams.begin(), grnTypeParams.end(), [&](std::vector params) { combinedParams.insert(combinedParams.end(), params.begin(), params.end()); diff --git a/docs/template_plugin/tests/functional/op_reference/mvn.cpp b/docs/template_plugin/tests/functional/op_reference/mvn.cpp new file mode 100644 index 00000000000000..5321164807b852 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/mvn.cpp @@ -0,0 +1,254 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace ngraph; +using namespace InferenceEngine; +using namespace reference_tests; + +// ------------------------------ V0 ------------------------------ + +struct MVN1Params { + MVN1Params(const Tensor& paramInput, const ngraph::AxisSet& paramReductionAxes, const bool paramAcrossChannels, const bool paramNormalizeVariance, + const double paramEps, const Tensor& paramExpected) + : input(paramInput), + reductionAxes(paramReductionAxes), + acrossChannels(paramAcrossChannels), + normalizeVariance(paramNormalizeVariance), + eps(paramEps), + expected(paramExpected) {} + Tensor input; + ngraph::AxisSet reductionAxes; + bool acrossChannels; + bool normalizeVariance; + double eps; + Tensor expected; +}; + +class ReferenceMVN1LayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.input, params.reductionAxes, params.acrossChannels, params.normalizeVariance, params.eps); + inputData = {params.input.data}; + refOutData = {params.expected.data}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.input.shape; + result << "_iType=" << param.input.type; + if (!param.reductionAxes.empty()) { + result << "_reductionAccess=" << CommonTestUtils::vec2str(param.reductionAxes.to_vector()); + } else { + result << "_acrossChannels=" << (param.acrossChannels ? "TRUE" : "FALSE"); + } + result << "_normalizeVariance=" << (param.normalizeVariance ? "TRUE" : "FALSE"); + result << "_eps=" << param.eps; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const Tensor& input, const ngraph::AxisSet& reductionAxes, const bool acrossChannels, + const bool normalizeVariance, const double eps) { + const auto in = std::make_shared(input.type, input.shape); + auto mvn = std::make_shared(in, acrossChannels, normalizeVariance, eps); + if (!reductionAxes.empty()) { + mvn = std::make_shared(in, reductionAxes, normalizeVariance, eps); + } + return std::make_shared(NodeVector {mvn}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceMVN1LayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +const ngraph::AxisSet emptyReductionAxes {}; + +INSTANTIATE_TEST_SUITE_P( + smoke_MVN1_With_Hardcoded_Refs, ReferenceMVN1LayerTest, + ::testing::Values( + // across_channels=false, variance=false + MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + emptyReductionAxes, + false, + false, + 1e-9, + Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {-4, -3, -2, -1, 0, 1, 2, 3, 4, -4, -3, -2, -1, 0, + 1, 2, 3, 4, -4, -3, -2, -1, 0, 1, 2, 3, 4}}), + // across_channels=true, variance=false + MVN1Params( + Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3}}, + emptyReductionAxes, + true, + false, + 1e-9, + Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector {-3.25, -2.25, -1.25, -0.25, 0.75, 1.75, 2.75, 3.75, 4.75, -3.25, -2.25, -1.25}}), + // across_channels=false, variance=true + MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + emptyReductionAxes, + false, + true, + 1e-9, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}), + // across_channels=true, variance=true + MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + emptyReductionAxes, + true, + true, + 1e-9, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}), + // reductionAxes, variance=false + MVN1Params( + Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3}}, + {1, 2, 3}, + false, + false, + 1e-9, + Tensor {{1, 3, 2, 2}, ngraph::element::f32, std::vector {-3.25, -2.25, -1.25, -0.25, 0.75, 1.75, 2.75, 3.75, 4.75, -3.25, -2.25, -1.25}}), + // reductionAxes, variance=true + MVN1Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + {2, 3}, + false, + true, + 1e-9, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}})), + ReferenceMVN1LayerTest::getTestCaseName); + +// ------------------------------ V6 ------------------------------ + +struct MVN6Params { + MVN6Params(const Tensor& paramInput, const Tensor& paramReductionAxes, const bool paramNormalizeVariance, const double paramEps, + const ngraph::op::MVNEpsMode mode, const Tensor& paramExpected) + : input(paramInput), + reductionAxes(paramReductionAxes), + normalizeVariance(paramNormalizeVariance), + eps(paramEps), + epsMode(mode), + expected(paramExpected) {} + Tensor input; + Tensor reductionAxes; + bool normalizeVariance; + double eps; + ngraph::op::MVNEpsMode epsMode; + Tensor expected; +}; + +class ReferenceMVN6LayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.input, params.reductionAxes, params.normalizeVariance, params.eps, params.epsMode); + inputData = {params.input.data}; + refOutData = {params.expected.data}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.input.shape; + result << "_iType=" << param.input.type; + result << "_reductionAccess=" << CommonTestUtils::vec2str(param.reductionAxes.shape); + result << "_normalizeVariance=" << (param.normalizeVariance ? "TRUE" : "FALSE"); + result << "_eps=" << param.eps; + result << "_eps_mode=" << param.epsMode; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const Tensor& input, const Tensor& reductionAxes, const bool normalizeVariance, const double eps, + const ngraph::op::MVNEpsMode epsMode) { + std::vector dataVector(reductionAxes.shape[0]); + const auto in = std::make_shared(input.type, input.shape); + auto mRef = as(reductionAxes.data); + IE_ASSERT(mRef); + const auto refLockMemory = mRef->rmap(); + const auto refBuffer = refLockMemory.as(); + for (size_t i = 0; i < dataVector.size(); ++i) { + dataVector[i] = refBuffer[i]; + } + const auto axes = std::make_shared(reductionAxes.type, reductionAxes.shape, dataVector); + auto mvn = std::make_shared(in, axes, normalizeVariance, eps, epsMode); + return std::make_shared(NodeVector {mvn}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceMVN6LayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_MVN6_With_Hardcoded_Refs, ReferenceMVN6LayerTest, + ::testing::Values( + // variance=false, OUTSIDE_SQRT + MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + Tensor {Shape {2}, ngraph::element::i64, std::vector {2, 3}}, + false, + 1e-9, + ngraph::op::MVNEpsMode::OUTSIDE_SQRT, + Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {-4, -3, -2, -1, 0, 1, 2, 3, 4, -4, -3, -2, -1, 0, + 1, 2, 3, 4, -4, -3, -2, -1, 0, 1, 2, 3, 4}}), + // variance=true, OUTSIDE_SQRT + MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + Tensor {Shape {2}, ngraph::element::i64, std::vector {2, 3}}, + true, + 1e-9, + ngraph::op::MVNEpsMode::OUTSIDE_SQRT, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}), + // variance=true, INSIDE_SQRT + MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}}, + Tensor {Shape {2}, ngraph::element::i64, std::vector {2, 3}}, + true, + 1e-9, + ngraph::op::MVNEpsMode::INSIDE_SQRT, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}}), + // variance=true, another reductionAxes, OUTSIDE_SQRT + MVN6Params(Tensor {{1, 3, 3, 3}, ngraph::element::f32, std::vector({1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9})}, + Tensor {Shape {3}, ngraph::element::i64, std::vector({1, 2, 3})}, + true, + 1e-9, + ngraph::op::MVNEpsMode::OUTSIDE_SQRT, + Tensor {{1, 3, 3, 3}, + ngraph::element::f32, + std::vector {-1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934, + -1.5491934, -1.161895, -0.7745967, -0.38729835, 0., 0.38729835, 0.7745967, 1.161895, 1.5491934}})), + ReferenceMVN6LayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/select.cpp b/docs/template_plugin/tests/functional/op_reference/select.cpp new file mode 100644 index 00000000000000..0cbc242c61b202 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/select.cpp @@ -0,0 +1,140 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + +struct SelectParams { + template + SelectParams(const element::Type& data_type, const op::AutoBroadcastSpec& broadcast, const PartialShape& select_input_pshape, + const std::vector& select_input, const PartialShape& if_input_pshape, const std::vector& if_input, + const PartialShape& else_input_pshape, const std::vector& else_input, const std::vector& expected_output) + : data_type(data_type), + broadcast(broadcast), + select_input_pshape(select_input_pshape), + select_input(CreateBlob(element::boolean, select_input)), + if_input_pshape(if_input_pshape), + if_input(CreateBlob(data_type, if_input)), + else_input_pshape(else_input_pshape), + else_input(CreateBlob(data_type, else_input)), + expected_output(CreateBlob(data_type, expected_output)) {} + + element::Type data_type; + op::AutoBroadcastSpec broadcast; + PartialShape select_input_pshape; + Blob::Ptr select_input; + PartialShape if_input_pshape; + Blob::Ptr if_input; + PartialShape else_input_pshape; + Blob::Ptr else_input; + Blob::Ptr expected_output; +}; + +class ReferenceSelectLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.data_type, params.broadcast, params.select_input_pshape, params.if_input_pshape, params.else_input_pshape); + inputData = {params.select_input, params.if_input, params.else_input}; + refOutData = {params.expected_output}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "data_type=" << param.data_type << "_"; + result << "broadcast=" << param.broadcast.m_type << "_"; + result << "select_shape=" << param.select_input_pshape << "_"; + result << "if_shape=" << param.if_input_pshape << "_"; + result << "else_shape=" << param.else_input_pshape; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const element::Type& data_type, const op::AutoBroadcastSpec& broadcast, + const PartialShape& select_pshape, const PartialShape& if_pshape, const PartialShape& else_pshape) { + auto A = std::make_shared(element::boolean, select_pshape); + auto B = std::make_shared(data_type, if_pshape); + auto C = std::make_shared(data_type, else_pshape); + return std::make_shared(std::make_shared(A, B, C, broadcast), ParameterVector {A, B, C}); + } +}; + +TEST_P(ReferenceSelectLayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P(smoke_Select_With_Hardcoded_Refs, ReferenceSelectLayerTest, + ::testing::Values( + // fp32, no brodcasting + SelectParams(element::f32, // if/else/output data type + op::AutoBroadcastType::NONE, // broadcasting type + PartialShape {2, 2, 2}, // select shape + std::vector {0, 1, 1, 0, 0, 1, 0, 1}, // select data + PartialShape {2, 2, 2}, // if shape + std::vector {1, 2, 3, 4, 5, 6, 7, 8}, // if data + PartialShape {2, 2, 2}, // else shape + std::vector {11, 12, 13, 14, 15, 16, 17, 18}, // else data + std::vector {11, 2, 3, 14, 15, 6, 17, 8}), // expected output data + // i32, no brodcasting + SelectParams(element::i32, // if/else/output data type + op::AutoBroadcastType::NONE, // broadcasting type + PartialShape {2, 2, 2}, // select shape + std::vector {0, 1, 1, 0, 0, 1, 0, 1}, // select data + PartialShape {2, 2, 2}, // if shape + std::vector {1, 2, 3, 4, 5, 6, 7, 8}, // if data + PartialShape {2, 2, 2}, // else shape + std::vector {11, 12, 13, 14, 15, 16, 17, 18}, // else data + std::vector {11, 2, 3, 14, 15, 6, 17, 8}), // expected output data + // fp32, numpy brodcasting + SelectParams(element::f32, // if/else/output data type + op::AutoBroadcastType::NUMPY, // broadcasting type + PartialShape {4}, // select shape + std::vector {0, 1, 1, 0}, // select data + PartialShape {4}, // if shape + std::vector {1, 2, 3, 4}, // if data + PartialShape {2, 4}, // else shape + std::vector {11, 12, 13, 14, 15, 16, 17, 18}, // else data + std::vector {11, 2, 3, 14, 15, 2, 3, 18}), // expected output data + // i32, numpy brodcasting + SelectParams(element::i32, // if/else/output data type + op::AutoBroadcastType::NUMPY, // broadcasting type + PartialShape {4}, // select shape + std::vector {0, 1, 1, 0}, // select data + PartialShape {4}, // if shape + std::vector {1, 2, 3, 4}, // if data + PartialShape {2, 4}, // else shape + std::vector {11, 12, 13, 14, 15, 16, 17, 18}, // else data + std::vector {11, 2, 3, 14, 15, 2, 3, 18}), // expected output data + // fp32, pdpd brodcasting + SelectParams(element::f32, // if/else/output data type + {op::AutoBroadcastType::PDPD, -1}, // broadcasting type + PartialShape {2, 4}, // select shape + std::vector {0, 0, 0, 0, 0, 1, 1, 1}, // select data + PartialShape {2, 4}, // if shape + std::vector {1, 2, 3, 4, 5, 6, 7, 8}, // if data + PartialShape {4}, // else shape + std::vector {11, 12, 13, 14}, // else data + std::vector {11, 12, 13, 14, 11, 6, 7, 8}), // expected output data + // i32, pdpd brodcasting + SelectParams(element::i32, // if/else/output data type + {op::AutoBroadcastType::PDPD, -1}, // broadcasting type + PartialShape {2, 4}, // select shape + std::vector {0, 0, 0, 0, 0, 1, 1, 1}, // select data + PartialShape {2, 4}, // if shape + std::vector {1, 2, 3, 4, 5, 6, 7, 8}, // if data + PartialShape {4}, // else shape + std::vector {11, 12, 13, 14}, // else data + std::vector {11, 12, 13, 14, 11, 6, 7, 8})), // expected output data + ReferenceSelectLayerTest::getTestCaseName); diff --git a/docs/template_plugin/tests/functional/op_reference/sign.cpp b/docs/template_plugin/tests/functional/op_reference/sign.cpp new file mode 100644 index 00000000000000..ca1505cea1368e --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/sign.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ngraph; +using namespace InferenceEngine; + +struct SignParams { + template + SignParams(const PartialShape& shape, const element::Type& iType, const element::Type& oType, const std::vector& iValues, + const std::vector& oValues) + : pshape(shape), inType(iType), outType(oType), inputData(CreateBlob(iType, iValues)), refData(CreateBlob(oType, oValues)) {} + PartialShape pshape; + element::Type inType; + element::Type outType; + Blob::Ptr inputData; + Blob::Ptr refData; +}; + +class ReferenceSignLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.pshape, params.inType); + inputData = {params.inputData}; + refOutData = {params.refData}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "shape=" << param.pshape << "_"; + result << "iType=" << param.inType << "_"; + result << "oType=" << param.outType; + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const PartialShape& input_shape, const element::Type& input_type) { + const auto in = std::make_shared(input_type, input_shape); + const auto sign = std::make_shared(in); + return std::make_shared(NodeVector {sign}, ParameterVector {in}); + } +}; + +TEST_P(ReferenceSignLayerTest, CompareWithHardcodedRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_Sign_With_Hardcoded_Refs, ReferenceSignLayerTest, + ::testing::Values( + SignParams(PartialShape {6}, element::f32, element::f32, + std::vector {1, -2, 0, -4.8f, 4.8f, -0.0f}, + std::vector {1, -1, 0, -1, 1, 0}), + SignParams(PartialShape {6}, element::f16, element::f16, + std::vector {1, -2, 0, -4.8f, 4.8f, -0.0f}, + std::vector {1, -1, 0, -1, 1, 0}), + SignParams(PartialShape {6}, element::u64, element::u64, + std::vector {1, 2, 0, 4, 4, 0}, + std::vector {1, 1, 0, 1, 1, 0}), + SignParams(PartialShape {6}, element::u32, element::u32, + std::vector {1, 2, 0, 4, 4, 0}, + std::vector {1, 1, 0, 1, 1, 0}), + SignParams(PartialShape {6}, element::i32, element::i32, + std::vector {1, -2, 0, -4, 4, -0}, + std::vector {1, -1, 0, -1, 1, 0}), + SignParams(PartialShape {6}, element::i64, element::i64, + std::vector {1, -2, 0, -4, 4, -0}, + std::vector {1, -1, 0, -1, 1, 0})), + ReferenceSignLayerTest::getTestCaseName); diff --git a/inference-engine/cmake/ie_parallel.cmake b/inference-engine/cmake/ie_parallel.cmake index d33a73a5fa760d..eb844d25b76e02 100644 --- a/inference-engine/cmake/ie_parallel.cmake +++ b/inference-engine/cmake/ie_parallel.cmake @@ -29,6 +29,7 @@ function(set_ie_threading_interface_for TARGET_NAME) set(TBB_IMPORTED_TARGETS ${TBB_IMPORTED_TARGETS} PARENT_SCOPE) set(TBB_VERSION ${TBB_VERSION} PARENT_SCOPE) if (NOT TBB_FOUND) + set(THREADING "SEQ" PARENT_SCOPE) ext_message(WARNING "TBB was not found by the configured TBB_DIR/TBBROOT path.\ SEQ method will be used.") endif () @@ -95,6 +96,7 @@ function(set_ie_threading_interface_for TARGET_NAME) set(IE_THREAD_DEFINE "IE_THREAD_TBB") ie_target_link_libraries(${TARGET_NAME} ${LINK_TYPE} ${TBB_IMPORTED_TARGETS}) else () + set(THREADING "SEQ" PARENT_SCOPE) ext_message(WARNING "TBB was not found by the configured TBB_DIR path.\ SEQ method will be used for ${TARGET_NAME}") endif () @@ -133,6 +135,7 @@ function(set_ie_threading_interface_for TARGET_NAME) if (NOT OMP_LIBRARIES_RELEASE) ext_message(WARNING "Intel OpenMP not found. Intel OpenMP support will be disabled. ${IE_THREAD_DEFINE} is defined") + set(THREADING "SEQ" PARENT_SCOPE) else () set(IE_THREAD_DEFINE "IE_THREAD_OMP") diff --git a/inference-engine/cmake/vpu_dependencies.cmake b/inference-engine/cmake/vpu_dependencies.cmake index d134c29171802c..e6ec3799a3ccf8 100644 --- a/inference-engine/cmake/vpu_dependencies.cmake +++ b/inference-engine/cmake/vpu_dependencies.cmake @@ -6,14 +6,14 @@ include_guard(GLOBAL) set(VPU_SUPPORTED_FIRMWARES usb-ma2x8x pcie-ma2x8x) set(VPU_SUPPORTED_FIRMWARES_HASH - "420b300d193f7fcfe7e3f9bbec6c247d65b784a500b5cd2effb7cb1ec6e1b209" - "bfe3caf270b168b9de18ef88f04bde3907d7d12a679f1fa7cc580423c35db637") + "54a732b5fb17a0124652bc5113fa628c718a5af40621bca309471cb5ffd9271b" + "5750b2831c77ef54b8e243d3840c5ed1c9509681d55aee7e369d558cef628735") # # Default packages # -set(FIRMWARE_PACKAGE_VERSION 1688) +set(FIRMWARE_PACKAGE_VERSION 1717) set(VPU_CLC_MA2X8X_VERSION "movi-cltools-20.09.2") # diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt index 7b93a4291a2d3a..a88b1017a124f4 100644 --- a/inference-engine/ie_bridges/python/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/CMakeLists.txt @@ -43,12 +43,14 @@ else() endif() if(ENABLE_CONDA_FOLDER) + set(PYTHON_COMPONENT conda_${PYTHON_VERSION}) if(WIN32) set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$/python_api/Conda/${PYTHON_VERSION}/openvino) else() set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/python_api/Conda/${PYTHON_VERSION}/openvino) endif() else() + set(PYTHON_COMPONENT ${PYTHON_VERSION}) if(WIN32) set(PYTHON_BRIDGE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/$/python_api/${PYTHON_VERSION}/openvino) else() @@ -56,6 +58,13 @@ else() endif() endif() +function(ov_python_disable_intel_warnings target) + if(UNIX AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + # 1292: unknown attribute "fallthrough" + target_compile_options(${target} PRIVATE -diag-disable=1292) + endif() +endfunction() + set (PYTHON_BRIDGE_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) add_subdirectory (src/openvino/inference_engine) add_subdirectory (src/openvino/offline_transformations) @@ -74,19 +83,19 @@ endif() # install -ie_cpack_add_component(${PYTHON_VERSION}) +ie_cpack_add_component(${PYTHON_COMPONENT}) install(FILES requirements.txt DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION} - COMPONENT ${PYTHON_VERSION}) + COMPONENT ${PYTHON_COMPONENT}) install(FILES requirements.txt DESTINATION ${PYTHON_BRIDGE_CPACK_PATH} - COMPONENT ${PYTHON_VERSION}) + COMPONENT ${PYTHON_COMPONENT}) install(PROGRAMS src/openvino/__init__.py DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino - COMPONENT ${PYTHON_VERSION}) + COMPONENT ${PYTHON_COMPONENT}) # install Python samples @@ -96,4 +105,4 @@ install(DIRECTORY sample/ DESTINATION ${IE_CPACK_IE_DIR}/samples/python COMPONENT python_samples) -ie_cpack(${PYTHON_VERSION} python_samples) +ie_cpack(${PYTHON_COMPONENT} python_samples) diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt index a236db836d60ae..cfab4f2d907f28 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt @@ -20,13 +20,15 @@ set_source_files_properties(${PYX_SOURCES} PROPERTIES CYTHON_IS_CXX ON) # create target cython_add_module(${TARGET_NAME} ${SOURCES}) -set(INSTALLED_TARGETS ${TARGET_NAME}) +ov_python_disable_intel_warnings(${TARGET_NAME}) +set(INSTALLED_TARGETS ${TARGET_NAME}) list(REMOVE_ITEM PYX_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/ie_api.pyx") foreach(PYX_FILE IN LISTS PYX_SOURCES) get_filename_component(PYX_NAME "${PYX_FILE}" NAME_WE) cython_add_module(${PYX_NAME} ${PYX_FILE}) + ov_python_disable_intel_warnings(${PYX_NAME}) add_dependencies(${TARGET_NAME} ${PYX_NAME}) target_include_directories(${PYX_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") target_link_libraries(${PYX_NAME} PRIVATE ${InferenceEngine_LIBRARIES}) @@ -70,12 +72,12 @@ add_custom_command(TARGET ${TARGET_NAME} # install install(TARGETS ${INSTALLED_TARGETS} - RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_VERSION} - LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_VERSION}) + RUNTIME DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_COMPONENT} + LIBRARY DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine COMPONENT ${PYTHON_COMPONENT}) install(PROGRAMS __init__.py DESTINATION ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}/openvino/inference_engine - COMPONENT ${PYTHON_VERSION}) + COMPONENT ${PYTHON_COMPONENT}) add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME} EXCLUDE_PATTERNS ".*\\.cxx;.*\\.pxd;.*\\.pyx") diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt index 27c9e7bf898257..512b1662be525c 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/CMakeLists.txt @@ -20,7 +20,9 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/offline_transformations_ # create target cython_add_module(${TARGET_NAME} ${SOURCES}) + add_dependencies(${TARGET_NAME} ie_api) +ov_python_disable_intel_warnings(${TARGET_NAME}) if(COMMAND ie_add_vs_version_file) ie_add_vs_version_file(NAME ${TARGET_NAME} @@ -54,12 +56,12 @@ add_custom_command(TARGET ${TARGET_NAME} # install # TODO: use ${PYTHON_VERSION}_dev component below -# ie_cpack_add_component(${PYTHON_VERSION}_dev DEPENDS ${PYTHON_VERSION}) +# ie_cpack_add_component(${PYTHON_VERSION}_dev DEPENDS ${PYTHON_COMPONENT}) install(TARGETS ${TARGET_NAME} - RUNTIME DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_VERSION} - LIBRARY DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_VERSION}) + RUNTIME DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT} + LIBRARY DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations COMPONENT ${PYTHON_COMPONENT}) install(PROGRAMS __init__.py DESTINATION python/${PYTHON_VERSION}/openvino/offline_transformations - COMPONENT ${PYTHON_VERSION}) + COMPONENT ${PYTHON_COMPONENT}) diff --git a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt index 8367f941d9f793..9d3e1e0ffc082d 100644 --- a/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/test_utils/CMakeLists.txt @@ -20,7 +20,9 @@ set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/test_utils_api.pyx # create target cython_add_module(${TARGET_NAME} ${SOURCES}) + add_dependencies(${TARGET_NAME} ie_api) +ov_python_disable_intel_warnings(${TARGET_NAME}) if(COMMAND ie_add_vs_version_file) ie_add_vs_version_file(NAME ${TARGET_NAME} diff --git a/inference-engine/include/gpu/gpu_ocl_wrapper.hpp b/inference-engine/include/gpu/gpu_ocl_wrapper.hpp index 85ca2521a76346..496f0974ad51e1 100644 --- a/inference-engine/include/gpu/gpu_ocl_wrapper.hpp +++ b/inference-engine/include/gpu/gpu_ocl_wrapper.hpp @@ -39,7 +39,7 @@ # pragma GCC system_header #endif -#include +#include #ifdef __GNUC__ # pragma GCC diagnostic pop diff --git a/inference-engine/samples/CMakeLists.txt b/inference-engine/samples/CMakeLists.txt index aef11e16f47bf8..c06336ec8f4e47 100644 --- a/inference-engine/samples/CMakeLists.txt +++ b/inference-engine/samples/CMakeLists.txt @@ -76,6 +76,10 @@ else() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") #treating warnings as errors endif() + if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -diag-disable:177") + endif() + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") if (APPLE) @@ -135,10 +139,6 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/cnpy") add_subdirectory(thirdparty/cnpy EXCLUDE_FROM_ALL) endif() -if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") -endif() - if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/common/utils") add_subdirectory(common/utils) endif() diff --git a/inference-engine/samples/benchmark_app/README.md b/inference-engine/samples/benchmark_app/README.md index 7c61bc570d518e..2d5076a60c613c 100644 --- a/inference-engine/samples/benchmark_app/README.md +++ b/inference-engine/samples/benchmark_app/README.md @@ -95,6 +95,7 @@ Options: -layout Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size. -cache_dir "" Optional. Enables caching of loaded models to specified directory. -load_from_file Optional. Loads model from file directly without ReadNetwork. + -latency_percentile Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median). CPU-specific performance options: -nstreams "" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp index af18c908e31b96..a369c2f1055ce1 100644 --- a/inference-engine/samples/benchmark_app/benchmark_app.hpp +++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp @@ -56,6 +56,10 @@ static const char infer_num_streams_message[] = "Optional. Number of streams to "Also, using nstreams>1 is inherently throughput-oriented option, " "while for the best-latency estimations the number of streams should be set to 1."; +/// @brief message for latency percentile settings +static const char infer_latency_percentile_message[] = + "Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median)."; + /// @brief message for enforcing of BF16 execution where it is possible static const char enforce_bf16_message[] = "Optional. By default floating point operations execution in bfloat16 precision are enforced " "if supported by platform.\n" @@ -189,6 +193,9 @@ DEFINE_uint32(nthreads, 0, infer_num_threads_message); /// @brief Number of streams to use for inference on the CPU (also affects Hetero cases) DEFINE_string(nstreams, "", infer_num_streams_message); +/// @brief The percentile which will be reported in latency metric +DEFINE_uint32(latency_percentile, 50, infer_latency_percentile_message); + /// @brief Enforces bf16 execution with bfloat16 precision on systems having this capability DEFINE_bool(enforcebf16, false, enforce_bf16_message); @@ -278,6 +285,7 @@ static void showUsage() { std::cout << " -layout " << layout_message << std::endl; std::cout << " -cache_dir \"\" " << cache_dir_message << std::endl; std::cout << " -load_from_file " << load_from_file_message << std::endl; + std::cout << " -latency_percentile " << infer_latency_percentile_message << std::endl; std::cout << std::endl << " device-specific performance options:" << std::endl; std::cout << " -nstreams \"\" " << infer_num_streams_message << std::endl; std::cout << " -nthreads \"\" " << infer_num_threads_message << std::endl; diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp index 2a5252ba443a85..8df3bc2f8e400b 100644 --- a/inference-engine/samples/benchmark_app/main.cpp +++ b/inference-engine/samples/benchmark_app/main.cpp @@ -52,6 +52,10 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) { throw std::logic_error("Model is required but not set. Please set -m option."); } + if (FLAGS_latency_percentile > 100 || FLAGS_latency_percentile < 1) { + showUsage(); + throw std::logic_error("The percentile value is incorrect. The applicable values range is [1, 100]."); + } if (FLAGS_api != "async" && FLAGS_api != "sync") { throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value."); } @@ -100,11 +104,10 @@ static void next_step(const std::string additional_info = "") { } template -T getMedianValue(const std::vector& vec) { +T getMedianValue(const std::vector& vec, std::size_t percentile) { std::vector sortedVec(vec); std::sort(sortedVec.begin(), sortedVec.end()); - return (sortedVec.size() % 2 != 0) ? sortedVec[sortedVec.size() / 2ULL] - : (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast(2.0); + return sortedVec[(sortedVec.size() / 100) * percentile]; } /** @@ -624,7 +627,7 @@ int main(int argc, char* argv[]) { // wait the latest inference executions inferRequestsQueue.waitAll(); - double latency = getMedianValue(inferRequestsQueue.getLatencies()); + double latency = getMedianValue(inferRequestsQueue.getLatencies(), FLAGS_latency_percentile); double totalDuration = inferRequestsQueue.getDurationInMilliseconds(); double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration; @@ -634,8 +637,14 @@ int main(int argc, char* argv[]) { {"total number of iterations", std::to_string(iteration)}, }); if (device_name.find("MULTI") == std::string::npos) { + std::string latency_label; + if (FLAGS_latency_percentile == 50) { + latency_label = "latency (ms)"; + } else { + latency_label = "latency (" + std::to_string(FLAGS_latency_percentile) + " percentile) (ms)"; + } statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, { - {"latency (ms)", double_to_string(latency)}, + {latency_label, double_to_string(latency)}, }); } statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"throughput", double_to_string(fps)}}); @@ -684,8 +693,15 @@ int main(int argc, char* argv[]) { std::cout << "Count: " << iteration << " iterations" << std::endl; std::cout << "Duration: " << double_to_string(totalDuration) << " ms" << std::endl; - if (device_name.find("MULTI") == std::string::npos) - std::cout << "Latency: " << double_to_string(latency) << " ms" << std::endl; + if (device_name.find("MULTI") == std::string::npos) { + std::cout << "Latency"; + if (FLAGS_latency_percentile == 50) { + std::cout << ": "; + } else { + std::cout << " (" << FLAGS_latency_percentile << " percentile): "; + } + std::cout << double_to_string(latency) << " ms" << std::endl; + } std::cout << "Throughput: " << double_to_string(fps) << " FPS" << std::endl; } catch (const std::exception& ex) { slog::err << ex.what() << slog::endl; diff --git a/inference-engine/src/cldnn_engine/CMakeLists.txt b/inference-engine/src/cldnn_engine/CMakeLists.txt index e292228c73f664..46dfd5e9fce858 100644 --- a/inference-engine/src/cldnn_engine/CMakeLists.txt +++ b/inference-engine/src/cldnn_engine/CMakeLists.txt @@ -12,7 +12,7 @@ if(CMAKE_COMPILER_IS_GNUCC) endif() endif() -if(GPU_DEBUG_CONFIG) +if(ENABLE_GPU_DEBUG_CAPS) add_definitions(-DGPU_DEBUG_CONFIG=1) endif() diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index 078a68c67843b6..206c50c93c857a 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -70,9 +70,12 @@ #include #include #include -#include +#include #include +#include +#include #include +#include #include #include @@ -151,10 +154,12 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::TransformNetwork"); auto nGraphFunc = clonedNetwork.getFunction(); + using const_node_ptr = const std::shared_ptr; + bool enableInt8; { ngraph::pass::Manager manager; - enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc); + enableInt8 = config.enableInt8 && ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(nGraphFunc); if (enableInt8) { manager.register_pass( std::vector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 }); @@ -208,8 +213,6 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc auto pass_config = manager.get_pass_config(); - using const_node_ptr = const std::shared_ptr; - // SpaceToDepth/DepthToSpace node implementation supports only equal input/output tensors with rank <= 5 pass_config->set_callback( @@ -391,28 +394,78 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc if (!config.enable_fp16_for_quantized_models) { manager.register_pass(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }}); } - auto lptPrerequisites = manager.register_pass(); - const std::vector supportedTypes = { ngraph::element::i8, ngraph::element::u8 }; - lptPrerequisites->add_matcher(supportedTypes); - lptPrerequisites->add_matcher(supportedTypes); - lptPrerequisites->add_matcher(); - manager.run_passes(nGraphFunc); - auto params = LayerTransformation::Params(true, // updatePrecisions - LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations - LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights - true); // supportAsymmetricQuantization - LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params) - .add(LayerTransformation::Params(params) - .setSupportAsymmetricQuantization(false) - .setSupport3DTensorOnActivations(false)) - .add(LayerTransformation::Params(params) - .setSupportAsymmetricQuantization(false) - .setDeconvolutionSpecificChannelsRatio(true)) - // INT8 StridedSlice not supported - .remove()); - - transformer.transform(nGraphFunc); + auto supportedPrecisions = std::vector({ + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}}, + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}} + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}} + }), + OperationPrecisionRestriction::create({}) + }); + + auto perTensorQuantization = std::vector({ + OperationPerTensorQuantizationRestriction::create({0}), + OperationPerTensorQuantizationRestriction::create({0}), + }); + + ngraph::pass::Manager lptManager; + + auto lptPassConfig = lptManager.get_pass_config(); + lptPassConfig->disable(); + lptPassConfig->set_callback([](const_node_ptr& node) -> bool { + if (const auto mulitply = std::dynamic_pointer_cast(node)) { + return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply); + } + return false; + }); + lptPassConfig->set_callback([](const_node_ptr& node) -> bool { + auto fillStaticChannel = [](const ngraph::PartialShape& shape, size_t& channel) -> bool { + const auto rank = shape.rank(); + if (rank.is_dynamic()) { + return false; + } + if (rank.get_length() < 2ul) { + return false; + } + const auto dimension = shape[1]; + if (dimension.is_dynamic()) { + return false; + } + channel = dimension.get_length(); + return true; + }; + + size_t inputChannels; + if (!fillStaticChannel(node->get_input_partial_shape(0), inputChannels)) { + return true; + } + + size_t outputChannels; + if (!fillStaticChannel(node->get_output_partial_shape(0), outputChannels)) { + return true; + } + + + if ((inputChannels % 4 != 0) || (outputChannels % 16 != 0)) { + return true; + } + + return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node); + }); + lptPassConfig->set_callback([](const_node_ptr& node) -> bool { + return MatMulTransformation::is3DTensorOnActivations(node); + }); + + lptManager.register_pass(supportedPrecisions, perTensorQuantization); + lptManager.run_passes(nGraphFunc); } { diff --git a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp index e562447189b6c7..0c0ddf7e637050 100644 --- a/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp +++ b/inference-engine/src/cldnn_engine/cldnn_primitives_list.hpp @@ -194,16 +194,17 @@ REGISTER_FACTORY(v5, LSTMSequence); //REGISTER_FACTORY(v5, NonMaxSuppression); Supported via v5 -> v5 internal conversion REGISTER_FACTORY(v5, Round); REGISTER_FACTORY(v5, GatherND); +REGISTER_FACTORY(v5, Loop); // ----------------------------- Unsupported v5 ops ----------------------------- // // REGISTER_FACTORY(v5, BatchNormInference); // REGISTER_FACTORY(v5, GRUSequence); -// REGISTER_FACTORY(v5, Loop); // REGISTER_FACTORY(v5, RNNSequence); // ------------------------------ Supported v6 ops ------------------------------ // REGISTER_FACTORY(v6, CTCGreedyDecoderSeqLen); REGISTER_FACTORY(v6, MVN); +REGISTER_FACTORY(v6, GatherElements); // ------------------------------ Supported v7 ops ------------------------------ // REGISTER_FACTORY(v7, Gather); diff --git a/inference-engine/src/cldnn_engine/ops/gather_elements.cpp b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp new file mode 100644 index 00000000000000..d61382807506c1 --- /dev/null +++ b/inference-engine/src/cldnn_engine/ops/gather_elements.cpp @@ -0,0 +1,66 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cldnn_program.h" +#include "cldnn_common_utils.h" + +#include "ngraph/op/gather_elements.hpp" +#include "ngraph/op/constant.hpp" + +#include "cldnn/primitives/gather_elements.hpp" + +namespace CLDNNPlugin { + +static cldnn::gather_elements::gather_elements_axis GetGatherAxis(int axis, unsigned rank) { + if (axis < 0) + axis += rank; + if (axis < 0 || axis >= rank) + IE_THROW() << "GatherElements axis is not correspond to number of dimensions"; + + // Difference in dimension ordering between IE and clDNN, + // reverse spatial dimensions after batch and feature. + unsigned cldnn_axis = axis; + if (axis >= 2) { + auto spatial_axis = axis - 2; + // Default and minimum number of dimensions is 4 + auto spatial_size = std::max(rank, 4u) - 2; + cldnn_axis = spatial_size - spatial_axis - 1 + 2; + } + + switch (cldnn_axis) { + case 0: return cldnn::gather_elements::gather_elements_axis::along_b; + case 1: return cldnn::gather_elements::gather_elements_axis::along_f; + case 2: return cldnn::gather_elements::gather_elements_axis::along_x; + case 3: return cldnn::gather_elements::gather_elements_axis::along_y; + case 4: return cldnn::gather_elements::gather_elements_axis::along_z; + case 5: return cldnn::gather_elements::gather_elements_axis::along_w; + default: IE_THROW() << "Unsupported GatherElements axis: " << axis; + } + return cldnn::gather_elements::gather_elements_axis::along_f; // shouldn't get here +} + +void CreateGatherElementsOp(Program& p, const std::shared_ptr& op) { + p.ValidateInputs(op, {2}); + auto inputPrimitives = p.GetInputPrimitiveIDs(op); + std::string layerName = layer_type_name_ID(op); + + size_t rank = op->get_input_shape(0).size(); + int32_t axis = static_cast(op->get_axis()); + + auto outLayout = DefaultFormatForDims(op->get_output_shape(0).size()); + + auto primitive = cldnn::gather_elements(layerName, + inputPrimitives[0], + inputPrimitives[1], + outLayout, + CldnnTensorFromIEDims(op->get_output_shape(0)), + GetGatherAxis(axis, rank)); + + p.AddPrimitive(primitive); + p.AddPrimitiveToProfiler(op); +} + +REGISTER_FACTORY_IMPL(v6, GatherElements); + +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/ops/loop.cpp b/inference-engine/src/cldnn_engine/ops/loop.cpp new file mode 100644 index 00000000000000..1ac452265b8820 --- /dev/null +++ b/inference-engine/src/cldnn_engine/ops/loop.cpp @@ -0,0 +1,227 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/////////////////////////////////////////////////////////////////////////////////////////////////// +#include "cldnn_program.h" +#include "cldnn_common_utils.h" +#include "cldnn_engine.h" + +#include + +#include "ngraph/op/loop.hpp" +#include "ngraph/op/constant.hpp" +#include "ngraph/op/util/sub_graph_base.hpp" +#include "transformations/utils/utils.hpp" +#include "ie_ngraph_utils.hpp" + +#include "cldnn/primitives/loop.hpp" +#include "cldnn/primitives/mutable_data.hpp" +#include "cldnn/primitives/data.hpp" +#include "cldnn/primitives/reorder.hpp" +#include "cldnn/graph/topology.hpp" + +#include +#include + +using Loop = ngraph::op::v5::Loop; + +namespace CLDNNPlugin { + +template +static DATA_TYPE CreateScalarData(Program &p, const cldnn::primitive_id& id, int64_t num) { + auto mem = p.GetEngine().allocate_memory({ cldnn::data_types::i64, cldnn::format::bfyx, { 1, 1, 1, 1 } }); + cldnn::mem_lock ptr{mem, p.GetEngine().get_program_stream()}; + *ptr.begin() = num; + return {id, mem}; +} + +static cldnn::mutable_data CreateAdditionalOutputData(Program &p, const std::shared_ptr& op, + const cldnn::primitive_id& id, const cldnn::primitive_id& input, + const int32_t output_idx) { + const auto precision = DataTypeFromPrecision(op->get_output_element_type(output_idx)); + const auto format = DefaultFormatForDims(op->get_output_shape(output_idx).size()); + const auto tensor = CldnnTensorFromIEDims(op->get_output_shape(output_idx)); + cldnn::layout output_layout = cldnn::layout(precision, format, tensor); + auto mem = p.GetEngine().allocate_memory(output_layout); + auto md = cldnn::mutable_data(id, {input}, mem); // cldnn::data cannot set dependency + return md; +} + +static void UpdateBackedge(std::vector& back_edges, + const cldnn::primitive_id& old_primitive_id, const cldnn::primitive_id& new_primitive_id) { + for (auto& back_edge : back_edges) { + if (back_edge.from == old_primitive_id) { + back_edge.from = new_primitive_id; + } + } +} + +static std::string GetExternalInputName(const int64_t body_parameter_index, + const std::shared_ptr& op) { + const auto& loop_input_descs = op->get_input_descriptions(); + for (const auto& loop_input_desc : loop_input_descs) { + if (loop_input_desc->m_body_parameter_index == body_parameter_index) { + auto external_node = op->get_input_node_shared_ptr(loop_input_desc->m_input_index); + return layer_type_name_ID(external_node); + } + } + return {""}; +} + +void CreateLoopOp(Program& p, const std::shared_ptr& op) { + const std::string layerName = layer_type_name_ID(op); + auto inputPrimitives = p.GetInputPrimitiveIDs(op); + const auto& loop_input_descs = op->get_input_descriptions(); + const auto& loop_output_descs = op->get_output_descriptions(); + const auto& body_inputs = op->get_function()->get_parameters(); + const auto& body_outputs = op->get_function()->get_results(); + + InferenceEngine::CNNNetwork body_network(op->get_function()); + auto networkInputs = body_network.getInputsInfo(); + auto networkOutputs = body_network.getOutputsInfo(); + + // Set special body ports: current_iteration input , execution condition output + auto special_body_ports = op->get_special_body_ports(); + + std::string body_current_iteration_id; + if (special_body_ports.current_iteration_input_idx >= 0) { + auto current_iteration_input = body_inputs.at(special_body_ports.current_iteration_input_idx); + body_current_iteration_id = layer_type_name_ID(current_iteration_input); + std::string input_name = ngraph::op::util::create_ie_output_name(current_iteration_input); + const auto networkInput = networkInputs.at(input_name); + auto precision = InferenceEngine::details::convertPrecision(current_iteration_input->get_element_type()); + networkInput->setPrecision(precision); + } + + cldnn::primitive_id body_execution_condition_id; + if (special_body_ports.body_condition_output_idx >= 0) { + auto body_condition_output = body_outputs.at(special_body_ports.body_condition_output_idx)->get_input_node_shared_ptr(0); + body_execution_condition_id = layer_type_name_ID(body_condition_output); + std::string output_name = ngraph::op::util::create_ie_output_name(body_condition_output); + const auto networkOutput = networkOutputs.at(output_name); + networkOutput->setPrecision(InferenceEngine::Precision::I64); + } + + // get body topology from ngraph function + Program body_program(body_network, p.GetEnginePtr(), p.GetConfig(), true); + auto body_topology = *body_program.GetTopology(); + + // setup input_primitive_maps/ output_primitive_maps and back_edges + std::vector input_primitive_maps; + std::vector output_primitive_maps; + std::vector back_edges; + + // set input mapping & back edges + for (const auto& loop_input_desc : loop_input_descs) { + const cldnn::primitive_id& external_id = inputPrimitives.at(loop_input_desc->m_input_index); + auto& body_input = body_inputs.at(loop_input_desc->m_body_parameter_index); + cldnn::primitive_id internal_id = layer_type_name_ID(body_input); + + // set input mapping + if (const auto& sliceInfo = + std::dynamic_pointer_cast(loop_input_desc)) { + // sliced input + input_primitive_maps.emplace_back(external_id, internal_id, sliceInfo->m_axis, + sliceInfo->m_start, sliceInfo->m_end, sliceInfo->m_stride); + } else { + // input without slicing + input_primitive_maps.emplace_back(external_id, internal_id); + } + + // set back edges + if (const auto& mergedInput = + std::dynamic_pointer_cast(loop_input_desc)) { + // backedge + const auto& to = body_inputs.at(mergedInput->m_body_parameter_index); + const auto& from = body_outputs.at(mergedInput->m_body_value_index); + + cldnn::primitive_id to_id = layer_type_name_ID(to); + cldnn::primitive_id from_id = layer_type_name_ID(from); + + // reset output data type because the data types of the outputs of the + // body topology are always FP32 regardless of ngraph data type + { + const auto from_prim = body_topology.at(from_id); + const auto& to_ngraph_type = to->get_element_type(); + const auto to_cldnn_type = DataTypeFromPrecision(to_ngraph_type); + from_prim->output_data_type = to_cldnn_type; + } + back_edges.emplace_back(from_id, to_id); + } + } + + // set trip count, initial execution condition, num iteration primitives + // they should be mutable_data to prevent from being optimized out + const cldnn::primitive_id trip_count_id = layer_type_name_ID(op->get_input_node_shared_ptr(0)); + const cldnn::primitive_id execution_condition_id = layer_type_name_ID(op->get_input_node_shared_ptr(1)); + const int64_t num_iterations = op->get_num_iterations(); + if (num_iterations < 0) { + IE_THROW() << "loop's num_iteration cannot be negative"; + } + const cldnn::primitive_id num_iteration_id = layerName + "_numIteration"; + { + cldnn::mutable_data num_iteration = CreateScalarData(p, num_iteration_id, 0); + p.primitivesToIRLayersMap[num_iteration_id] = { op->get_friendly_name() }; + p.primitiveIDs[num_iteration_id] = num_iteration_id; + p.AddPrimitive(num_iteration); + p.AddInnerPrimitiveToProfiler(num_iteration_id, layerName, op); + } + + // set output mapping + for (const auto& loop_output_desc : loop_output_descs) { + const uint64_t output_idx = loop_output_desc->m_output_index; + + // Add additional mutable_data for multiple outputs + // primitive ID should be . if output_idx > 0 + // otherwise primitive ID should be equals to TI primitive ID + const std::string layerNameWithIndex = layerName + "." + std::to_string(output_idx); + std::string external_id; + if (output_idx > 0) { + cldnn::mutable_data output_data = CreateAdditionalOutputData(p, op, layerNameWithIndex, layerName, output_idx); + p.AddPrimitive(output_data); + p.AddInnerPrimitiveToProfiler(layerNameWithIndex, layerName, op); + p.primitiveIDs[layerNameWithIndex] = layerNameWithIndex; + external_id = layerNameWithIndex; + } else { + p.primitiveIDs[layerNameWithIndex] = layerName; + p.primitiveIDs[layerName] = layerName; + external_id = layerName; + } + const auto& body_output = body_outputs.at(loop_output_desc->m_body_value_index); + cldnn::primitive_id internal_id = layer_type_name_ID(body_output); + + // update primitive_map + if (const auto& concatOutput = + std::dynamic_pointer_cast(loop_output_desc)) { + // output which requires concatenation + output_primitive_maps.emplace_back(external_id, internal_id, concatOutput->m_axis, + concatOutput->m_start, concatOutput->m_end, concatOutput->m_stride); + } + if (std::dynamic_pointer_cast(loop_output_desc)) { + // output which requires no concatenation + output_primitive_maps.emplace_back(external_id, internal_id); + } + } + + const cldnn::loop loopPrimitive( + layerName, /* layer name of this primitive (output id) */ + inputPrimitives, /* inputs of this layer */ + body_topology, /* body network */ + trip_count_id, /* trip_count data in outer network, always same as num_iterations in TI */ + execution_condition_id, /* initial_execution_condition data in outer network, always true in TI */ + num_iteration_id, /* actual number of iteration data in body network */ + input_primitive_maps, /* input mappings connecting outer network and inner network */ + output_primitive_maps, /* output mappings connecting outer network and inner network */ + back_edges, /* back edge mapping */ + num_iterations, /* max iteration, i.e. length of iteration axis */ + body_current_iteration_id, + body_execution_condition_id); + + p.AddPrimitive(loopPrimitive); + p.AddPrimitiveToProfiler(op); +} + +REGISTER_FACTORY_IMPL(v5, Loop); + +} // namespace CLDNNPlugin diff --git a/inference-engine/src/gna_plugin/backend/dnn_types.h b/inference-engine/src/gna_plugin/backend/dnn_types.h index d08d9346d35c89..0b00b41ec830d7 100644 --- a/inference-engine/src/gna_plugin/backend/dnn_types.h +++ b/inference-engine/src/gna_plugin/backend/dnn_types.h @@ -227,7 +227,7 @@ OvGnaType OvGnaTypeIntFromBytes(T bytesPerElement) { return r->second; } -static std::string OvGnaTypeToString(OvGnaType type) { +inline std::string OvGnaTypeToString(OvGnaType type) { static const std::map typeToString = { {OvGnaTypeInt8, "OvGnaTypeInt8"}, {OvGnaTypeInt16, "OvGnaTypeInt16"}, @@ -241,7 +241,7 @@ static std::string OvGnaTypeToString(OvGnaType type) { return r->second; } -static std::string OvGnaModeToString(OvGnaMode mode) { +inline std::string OvGnaModeToString(OvGnaMode mode) { static const std::map modeToString = { {OvGnaModeDefault, "OvGnaModeDefault"}, {OvGnaModeDisabled, "OvGnaModeDisabled"}, diff --git a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp index 90af04519291a6..41a8178ea4e017 100644 --- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp +++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp @@ -87,6 +87,8 @@ class Validator { static void ThrowIfNotEmpty(const std::string prefix, const std::string error); public: + Validator() = default; + void ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision) const; diff --git a/inference-engine/src/gna_plugin/gna_graph_patterns.hpp b/inference-engine/src/gna_plugin/gna_graph_patterns.hpp index eed44b0ef35324..4c4ceb85d14802 100644 --- a/inference-engine/src/gna_plugin/gna_graph_patterns.hpp +++ b/inference-engine/src/gna_plugin/gna_graph_patterns.hpp @@ -65,9 +65,11 @@ inline std::pair Fin if (parent->outData.size() != 1 || InferenceEngine::getInputTo(parent->outData[0]).size() != 1) { return std::make_pair(nullptr, nullptr); } - auto parent_dims = parent->outData[0]->getDims(); - // Check if the previous layer has all dimensions except one to be equal to 1 - if (std::count_if(std::begin(parent_dims), std::end(parent_dims), [](size_t dim) { return dim != 1; }) > 1) { + // Check if reshape is expected for this pattern: + // the previous layer has number of channels > 1 and one of height/width dimensions is also > 1 + if (GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::C) != 1 && + (GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::H) != 1 || + GetDataDimSize(parent->outData[0], InferenceEngine::DataDimName::W) != 1)) { return std::make_pair(nullptr, nullptr); } } diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp index c40b97209e1075..3c91f18dc3bd7f 100644 --- a/inference-engine/src/gna_plugin/gna_plugin.cpp +++ b/inference-engine/src/gna_plugin/gna_plugin.cpp @@ -470,7 +470,6 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ auto data = input.second->getInputData(); for (auto && nextToInputLayer : getInputTo(data)) { if (!LayerInfo(nextToInputLayer.second).isFakeQuantize()) { - inputIdx++; continue; } // replacing scale factor from this fq layer @@ -493,6 +492,9 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ scaleInput = (fqLayer.getLevels() - 1) / (2 * maxAbsVal); } + IE_ASSERT(config.inputScaleFactors.size() > inputIdx); + IE_ASSERT(inputsDesc->inputScaleFactors.size() > inputIdx); + if (!config.inputScaleFactors.empty()) { gnalog() << "Scale factor calculated during model quantization (" << scaleInput << ") will be used instead of user input (" << inputsDesc->inputScaleFactors[inputIdx] << ").\n"; @@ -505,9 +507,9 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ config.inputScaleFactors[inputIdx] = scaleInput; inputsDesc->inputScaleFactors[inputIdx] = scaleInput; - - inputIdx++; } + + inputIdx++; } } @@ -752,12 +754,14 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { passes->registerPass(); passes->registerPass(); + passes->registerPass(); + passes->registerPass(); + passes->registerPass(); passes->registerPass(); passes->registerPass(); - passes->registerPass(); passes->registerPass(); passes->registerPass(); passes->registerPass(); @@ -775,7 +779,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { #if GNA_LIB_VER == 2 passes->registerPass(); #endif - passes->registerPass(); passes->registerPass(); passIdx = passes->run(passIdx); }; diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp index ae731465025e05..f4e5fc7a9316d9 100644 --- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp +++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp @@ -1530,16 +1530,7 @@ void SubstituteScaleShiftBroadCastPass::run() { continue; } - // only 3d scaleshift supported where number of c is arbitrary - auto lastD = reshape_batch ? dataDims[1] : dataDims.back(); - if (lastD != weightsElements) { - THROW_GNA_EXCEPTION << "Unsupported layer: " << l->name - << " should have last dim(" << lastD << ") equal to weights(" << weightsElements << ") length"; - } - if (dataDims.size() == 2) { - THROW_GNA_EXCEPTION << "For layer: " << l->name - << " weights size(" << weightsElements<< ") invalid: should match input size of(" << lastD << ")"; - } + // TODO: add broadcasting rules checks gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n"; if (nElements % scaleShift->_weights->size()) { @@ -2186,7 +2177,7 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() { } if (isFQFuseAllowed) { - getInputTo(prevData).clear(); + getInputTo(prevData).erase(l->name); } // Connect all next layers after FQ to the layer that is before FQ @@ -2220,6 +2211,17 @@ void TransposeWeightsFromNCHWToNHWCPass::run() { } }; + auto transpInfoMatchWeightsSize = [](const std::vector &transpositionInfo, size_t weightsSize, const std::string &layerName) { + size_t totalElements = 0; + for (auto && transpositionInfoPart : transpositionInfo) { + totalElements += transpositionInfoPart.num_transpose_rows * transpositionInfoPart.num_transpose_columns; + } + if (totalElements != weightsSize) { + THROW_GNA_EXCEPTION << layerName << " weights elements from transposition info (" << totalElements + << ") don't match input dimensions (" << weightsSize << ")"; + } + }; + for (auto &&l : *pLayers) { if (LayerInfo(l).isScaleShift()) { std::vector transpositionInfo; @@ -2237,6 +2239,10 @@ void TransposeWeightsFromNCHWToNHWCPass::run() { } auto weightable = dynamic_cast(l.get()); IE_ASSERT(weightable != nullptr); + + size_t totalWeights = weightable->_weights->size(); + transpInfoMatchWeightsSize(transpositionInfo, totalWeights, l->name); + ConvertTensorFromNCHWToNHWC(weightable->precision.size(), 1, weightable->_weights->size(), weightable->_weights->cbuffer().as(), true, transpositionInfo); if (weightable->_biases) { @@ -2270,14 +2276,9 @@ void TransposeWeightsFromNCHWToNHWCPass::run() { // If we found a split it's not possible to rotate data THROW_GNA_EXCEPTION << l->name << " won't be transposed due to a split before it"; } - size_t totalColumns = 0; - for (auto && transpositionInfoPart : transpositionInfo) { - totalColumns += transpositionInfoPart.num_transpose_rows * transpositionInfoPart.num_transpose_columns; - } - if (weightsColumns != totalColumns) { - THROW_GNA_EXCEPTION << l->name << " weights columns from transposition info (" << totalColumns - << ") don't match input dimensions (" << weightsColumns << ")"; - } + + transpInfoMatchWeightsSize(transpositionInfo, weightsColumns, l->name); + ConvertTensorFromNCHWToNHWC(precision, weightsRows, weightsColumns, weightable->_weights->cbuffer().as(), true, transpositionInfo); gnalog() << l->name << " weights rows transposition info:\n"; @@ -2297,14 +2298,9 @@ void TransposeWeightsFromNCHWToNHWCPass::run() { // If we found a concat it's not possible to rotate data THROW_GNA_EXCEPTION << l->name << " won't be transposed due to a concat after it"; } - size_t totalRows = 0; - for (const auto& transpositionInfoPart : transpositionInfo) { - totalRows += transpositionInfoPart.num_transpose_rows * transpositionInfoPart.num_transpose_columns; - } - if (weightsRows != totalRows) { - THROW_GNA_EXCEPTION << l->name << " weights rows from transposition info (" << totalRows - << ") don't match output dimensions (" << weightsRows << ")"; - } + + transpInfoMatchWeightsSize(transpositionInfo, weightsRows, l->name); + ConvertTensorFromNCHWToNHWC(precision, weightsRows, weightsColumns, weightable->_weights->cbuffer().as(), false, transpositionInfo); gnalog() << l->name << " weights columns transposition info:\n"; diff --git a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp index 7171363e7830f2..2b8d2f4f261667 100644 --- a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp +++ b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp @@ -77,7 +77,7 @@ void HeteroInferRequest::SetBlob(const std::string& name, const InferenceEngine: if (findInputAndOutputBlobByName(name, foundInput, foundOutput)) { r->SetBlob(name, data, foundInput->getPreProcess()); } - } catch (const InferenceEngine::NotFound& ex) {} + } catch (const InferenceEngine::NotFound&) {} } } diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt index aeb0386e85c878..e31f7c3bf5aba2 100644 --- a/inference-engine/src/inference_engine/CMakeLists.txt +++ b/inference-engine/src/inference_engine/CMakeLists.txt @@ -120,10 +120,12 @@ ie_faster_build(${TARGET_NAME}_obj ) target_compile_definitions(${TARGET_NAME}_obj PRIVATE IMPLEMENT_INFERENCE_ENGINE_API - $) + $ + $) target_include_directories(${TARGET_NAME}_obj SYSTEM PRIVATE $ $ + $ $) target_include_directories(${TARGET_NAME}_obj PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}" @@ -160,7 +162,7 @@ if (TBBBIND_2_4_FOUND) endif() target_link_libraries(${TARGET_NAME} PRIVATE pugixml::static openvino::itt ${CMAKE_DL_LIBS} Threads::Threads - ngraph inference_engine_transformations) + ngraph ngraph::frontend_manager::static inference_engine_transformations) target_include_directories(${TARGET_NAME} INTERFACE $ @@ -200,7 +202,7 @@ if(WIN32) set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s) endif() -target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ngraph +target_link_libraries(${TARGET_NAME}_s PRIVATE openvino::itt ${CMAKE_DL_LIBS} ngraph ngraph::frontend_manager::static inference_engine_transformations pugixml::static) target_compile_definitions(${TARGET_NAME}_s PUBLIC USE_STATIC_IE) @@ -227,7 +229,14 @@ list(APPEND core_components ngraph) list(APPEND PATH_VARS "IE_INCLUDE_DIR" "IE_NGRAPH_DIR" "IE_PARALLEL_CMAKE") -if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCHES ${TEMP}) +# define variables for InferenceEngineConfig.cmake +if(THREADING MATCHES "^(TBB|TBB_AUTO)$") + set(IE_TBB_DIR "${TBB_DIR}") + list(APPEND PATH_VARS "IE_TBB_DIR") +endif() + +# install only downloaded TBB, system one is not installed +if(THREADING MATCHES "^(TBB|TBB_AUTO)$" AND TBBROOT MATCHES ${TEMP}) ie_cpack_add_component(tbb REQUIRED) list(APPEND core_components tbb) @@ -247,8 +256,6 @@ if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND TBBROOT MATCH COMPONENT tbb) set(IE_TBB_DIR_INSTALL "external/tbb/cmake") - set(IE_TBB_DIR "${TBB_DIR}") - list(APPEND PATH_VARS "IE_TBB_DIR") install(FILES "${TBB}/cmake/TBBConfig.cmake" "${TBB}/cmake/TBBConfigVersion.cmake" diff --git a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp index 1f05ca0098c3da..ba73bcc8917437 100644 --- a/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp +++ b/inference-engine/src/inference_engine/cnn_network_ngraph_impl.cpp @@ -364,13 +364,10 @@ CNNNetworkNGraphImpl::reshape(const std::map& bool parameter_replaced = false; for (size_t i = 0; i < params.size(); i++) { - const auto& param = params[i]; + auto& param = params[i]; if (inputShapes.find(param->get_friendly_name()) == inputShapes.end()) continue; - ::ngraph::PartialShape shape(inputShapes.at(param->get_friendly_name())); - auto newParam = std::make_shared<::ngraph::op::Parameter>(param->get_element_type(), shape); - newParam->set_friendly_name(param->get_friendly_name()); - _ngraph_function->replace_parameter(i, newParam); + param->set_partial_shape(inputShapes.at(param->get_friendly_name())); parameter_replaced = true; } if (parameter_replaced) diff --git a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp b/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp index 9e68666b7a36f6..f94a3b6ba1c162 100644 --- a/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp +++ b/inference-engine/src/inference_engine/cpp/ie_infer_request.cpp @@ -127,7 +127,7 @@ void InferRequest::SetCompletionCallbackImpl(std::function { plugin.ImportNetwork(networkStream, config); networkIsImported = true; }); - } catch (const HeaderException& ex) { + } catch (const HeaderException&) { // For these exceptions just remove old cache and set that import didn't work cacheManager->removeCacheEntry(blobId); networkIsImported = false; diff --git a/inference-engine/src/inference_engine/ie_network_reader.cpp b/inference-engine/src/inference_engine/ie_network_reader.cpp index 6043303712dc02..7189a0a098aaa9 100644 --- a/inference-engine/src/inference_engine/ie_network_reader.cpp +++ b/inference-engine/src/inference_engine/ie_network_reader.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -226,6 +227,26 @@ CNNNetwork details::ReadNetwork(const std::string& modelPath, const std::string& return reader->read(modelStream, exts); } } + // Try to load with FrontEndManager + static ngraph::frontend::FrontEndManager manager; + ngraph::frontend::FrontEnd::Ptr FE; + ngraph::frontend::InputModel::Ptr inputModel; + if (!binPath.empty()) { +#if defined(ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) + std::wstring weights_path = FileUtils::multiByteCharToWString(binPath.c_str()); +#else + std::string weights_path = binPath; +#endif + FE = manager.load_by_model(model_path, weights_path); + if (FE) inputModel = FE->load(model_path, weights_path); + } else { + FE = manager.load_by_model(model_path); + if (FE) inputModel = FE->load(model_path); + } + if (inputModel) { + auto ngFunc = FE->convert(inputModel); + return CNNNetwork(ngFunc); + } IE_THROW() << "Unknown model format! Cannot find reader for model format: " << fileExt << " and read the model: " << modelPath << ". Please check that reader library exists in your PATH."; } @@ -248,4 +269,4 @@ CNNNetwork details::ReadNetwork(const std::string& model, const Blob::CPtr& weig IE_THROW() << "Unknown model format! Cannot find reader for the model and read it. Please check that reader library exists in your PATH."; } -} // namespace InferenceEngine \ No newline at end of file +} // namespace InferenceEngine diff --git a/inference-engine/src/legacy_api/include/legacy/ie_layers.h b/inference-engine/src/legacy_api/include/legacy/ie_layers.h index e7ea32467a0654..64fa501966a97e 100644 --- a/inference-engine/src/legacy_api/include/legacy/ie_layers.h +++ b/inference-engine/src/legacy_api/include/legacy/ie_layers.h @@ -1025,7 +1025,8 @@ class INFERENCE_ENGINE_INTERNAL_CNNLAYER_CLASS(EltwiseLayer): public CNNLayer { Logical_OR, Logical_XOR, Logical_NOT, - Mean + Mean, + Abs, }; /** diff --git a/inference-engine/src/low_precision_transformations/CMakeLists.txt b/inference-engine/src/low_precision_transformations/CMakeLists.txt index c6306dbc08f067..7f9d34e7149c88 100644 --- a/inference-engine/src/low_precision_transformations/CMakeLists.txt +++ b/inference-engine/src/low_precision_transformations/CMakeLists.txt @@ -28,8 +28,6 @@ ie_faster_build(${TARGET_NAME} ie_add_vs_version_file(NAME ${TARGET_NAME} FILEDESCRIPTION "Inference Engine LP transformations library") -target_compile_definitions(${TARGET_NAME} PRIVATE inference_engine_transformations_EXPORTS) - target_link_libraries(${TARGET_NAME} PUBLIC inference_engine_transformations PRIVATE openvino::itt) diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/add.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/add.hpp index fa64037797a384..92caba9f382a5f 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/add.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/add.hpp @@ -11,12 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API AddTransformation : public EltwiseBaseTransformation { +class LP_TRANSFORMATIONS_API AddTransformation : public EltwiseBaseTransformation { public: - AddTransformation(const Params& params) : EltwiseBaseTransformation(params) {} - ~AddTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + AddTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_intervals.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_intervals.hpp new file mode 100644 index 00000000000000..4293be82f15d23 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_intervals.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API AlignQuantizationIntervals; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::AlignQuantizationIntervals : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_parameters.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_parameters.hpp new file mode 100644 index 00000000000000..fc7f7d30e7f876 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/align_quantization_parameters.hpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API AlignQuantizationParameters; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::AlignQuantizationParameters : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/avg_pool.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/avg_pool.hpp index 823c8990110904..2d37f030ae30a0 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/avg_pool.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/avg_pool.hpp @@ -11,11 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API AvgPoolTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API AvgPoolTransformation : public LayerTransformation { public: - AvgPoolTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + AvgPoolTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/base_matcher_pass.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/base_matcher_pass.hpp new file mode 100644 index 00000000000000..4c637624e40f3d --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/base_matcher_pass.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include +#include +#include "rt_info/attribute_parameters.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API BaseMatcherPass; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::BaseMatcherPass : public ngraph::pass::MatcherPass { +public: + BaseMatcherPass(const AttributeParameters& params = AttributeParameters()); + AttributeParameters params; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/clamp.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/clamp.hpp index 7698cf5b6da3ca..a3cf76a1284470 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/clamp.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/clamp.hpp @@ -12,11 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ClampTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ClampTransformation : public LayerTransformation { public: - ClampTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override; + NGRAPH_RTTI_DECLARATION; + ClampTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/dequantization_op.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/dequantization_op.hpp index e2fdc58f1b7e18..46b739959d6c28 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/dequantization_op.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/dequantization_op.hpp @@ -13,7 +13,7 @@ #include #include -#include "transformations_visibility.hpp" +#include "low_precision/lpt_visibility.hpp" #include "transformations/rt_info/dequantization_attribute.hpp" namespace ngraph { @@ -21,7 +21,7 @@ namespace pass { namespace low_precision { // template -// class TRANSFORMATIONS_API DequantizationOp : public BaseOp2 { +// class LP_TRANSFORMATIONS_API DequantizationOp : public BaseOp2 { // public: // template // DequantizationOp(Args&&... args) : BaseOp2(std::forward(args)...) { @@ -63,7 +63,7 @@ void copyRuntimeInfo(const ngraph::Node& from, ngraph::Node& to) { } // namespace -class TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert { +class LP_TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert { public: DequantizationConvert(const ngraph::Output& arg, const ngraph::element::Type& destination_type) : ngraph::opset1::Convert(arg, destination_type) { @@ -77,7 +77,7 @@ class TRANSFORMATIONS_API DequantizationConvert : public ngraph::opset1::Convert } }; -class TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtract { +class LP_TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtract { public: DequantizationSubtract( const ngraph::Output& arg0, @@ -94,7 +94,7 @@ class TRANSFORMATIONS_API DequantizationSubtract : public ngraph::opset1::Subtra } }; -class TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multiply { +class LP_TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multiply { public: DequantizationMultiply( const Output& arg0, @@ -116,7 +116,7 @@ class TRANSFORMATIONS_API DequantizationMultiply : public ngraph::opset1::Multip } }; -class TRANSFORMATIONS_API DequantizationAdd : public ngraph::opset1::Add { +class LP_TRANSFORMATIONS_API DequantizationAdd : public ngraph::opset1::Add { public: DequantizationAdd( const ngraph::Output& arg0, diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp index 67c522bb7e3fcf..a9fba5234d1846 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/fake_quantize_dequantization.hpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace ngraph { namespace pass { @@ -15,7 +16,7 @@ namespace low_precision { typedef std::tuple, std::shared_ptr> FakeQuantizeDequantizationValues; -class FakeQuantizeDequantization { +class LP_TRANSFORMATIONS_API FakeQuantizeDequantization { public: FakeQuantizeDequantization(); diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp index 1c4cd359f5114e..7057fc1f59764a 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/ie_lpt_exception.hpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include /** * @def THROW_TRANSFORMATION_EXCEPTION_LPT @@ -19,7 +19,7 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API Exception : std::exception { +class LP_TRANSFORMATIONS_API Exception : std::exception { std::shared_ptr buffer; mutable std::string buffer_str; public: @@ -42,7 +42,7 @@ class TRANSFORMATIONS_API Exception : std::exception { #define THROW_TRANSFORMATION_EXCEPTION throw ::ngraph::pass::low_precision::Exception() << __FILE__ << ":" << __LINE__ << " " -class TRANSFORMATIONS_API InferenceEngineLptException : public Exception { +class LP_TRANSFORMATIONS_API InferenceEngineLptException : public Exception { public: InferenceEngineLptException(const std::string& filename, const size_t line, const Node& node) { *this diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_per_tensor_quantization_restriction.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_per_tensor_quantization_restriction.hpp new file mode 100644 index 00000000000000..4c5321b26bef99 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_per_tensor_quantization_restriction.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { +namespace low_precision { + +class OperationPerTensorQuantizationRestriction { +public: + using RestrictedPorts = std::vector; + + ngraph::Node::type_info_t operationType; + bool specifyVersion; + std::vector restrictedPorts; + + OperationPerTensorQuantizationRestriction() = default; + OperationPerTensorQuantizationRestriction( + const ngraph::Node::type_info_t operationType, + const bool specifyVersion, + const RestrictedPorts& restrictedPorts) : + operationType(operationType), + specifyVersion(specifyVersion), + restrictedPorts(restrictedPorts) {} + + template + static OperationPerTensorQuantizationRestriction create( + const RestrictedPorts& restrictedPorts = {}, + const bool specifyVersion = false) { + return OperationPerTensorQuantizationRestriction(T::get_type_info_static(), specifyVersion, restrictedPorts); + } + + template + static RestrictedPorts getPrecisionsByOperationType(std::vector& restrictions) { + for (const auto& restriction : restrictions) { + if (restriction.operationType == T::get_type_info_static()) { + return restriction.restrictedPorts; + } + } + return {}; + } +}; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_precision_restriction.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_precision_restriction.hpp new file mode 100644 index 00000000000000..d22252ee7afd88 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/common/operation_precision_restriction.hpp @@ -0,0 +1,59 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace ngraph { +namespace pass { +namespace low_precision { + +class OperationPrecisionRestriction { +public: + using PrecisionsByPort = std::vector>>; + + ngraph::Node::type_info_t operationType; + bool specifyVersion; + std::vector>> precisionsByPort; + + OperationPrecisionRestriction() = default; + OperationPrecisionRestriction( + const ngraph::Node::type_info_t operationType, + const bool specifyVersion, + const PrecisionsByPort& precisionsByPort) : + operationType(operationType), + specifyVersion(specifyVersion), + precisionsByPort(precisionsByPort) {} + + template + static OperationPrecisionRestriction create( + const PrecisionsByPort& precisionsByPort, + const bool specifyVersion = false) { + return OperationPrecisionRestriction(T::get_type_info_static(), specifyVersion, precisionsByPort); + } + + template + static PrecisionsByPort getPrecisionsByOperationType(std::vector& restrictions) { + for (const auto& restriction : restrictions) { + if (restriction.operationType == T::get_type_info_static()) { + return restriction.precisionsByPort; + } + } + return {}; + } +}; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/common/subgraph.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/common/subgraph.hpp deleted file mode 100644 index 83e8cfc9cc955c..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/common/subgraph.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include -#include "../ilayer_transformations_manager.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -class Subgraph { -public: - Subgraph(ngraph::pass::ILayerTransformationsManager* layerTransformationsManager); - - bool fillSubgraphForConcat(const std::shared_ptr& concat, std::unordered_set& handledLayers); - bool empty() const; - - std::vector> quantizationLayers; - std::vector> concatLayers; - std::unordered_map> layers; - -private: - bool atLeastOneIsIntermediate(const std::shared_ptr& node) const; - bool fillSubgraphForQuantization(const std::shared_ptr& fakeQuantize, std::unordered_set& handledLayers); - bool fillSubgraphForIntermediate(const std::shared_ptr& intermediate, std::unordered_set& handledLayers); - bool fill(const std::shared_ptr& concat, std::unordered_set& handledLayers); - const ngraph::pass::ILayerTransformationsManager* layerTransformationsManager; -}; - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp index e381fd5d0a0401..db16f572224293 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/concat.hpp @@ -13,32 +13,21 @@ #include #include "layer_transformation.hpp" -#include "common/subgraph.hpp" #include "common/fake_quantize_dequantization.hpp" namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ConcatTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ConcatTransformation : public LayerTransformation { public: - ConcatTransformation(const Params& params) : LayerTransformation(params) {} - ~ConcatTransformation() override {}; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + ConcatTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; protected: - void addDequantizationLayers( - TransformationContext& context, - ngraph::pass::low_precision::Subgraph& subgraph, - std::function layer, - std::shared_ptr child, - const std::string originalLayerName, - std::vector& dequantizationsToConcatenate)> getLayerDequantizationCallback) const; - static bool isHandled( const TransformationContext& context, const std::vector>& quantizationOperations); @@ -51,14 +40,6 @@ class TRANSFORMATIONS_API ConcatTransformation : public LayerTransformation { NodeVector& multiplyNodes) const; std::shared_ptr concatenateDeqNodes(NodeVector& nodes) const; - -private: - size_t getMinQuantizationLevels( - const DataPrecision& dataPrecision, - const float maxOutputInterval, - const std::vector& quantizationLayersDetails, - const float outputLowValue, - const float outputHighValue) const; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp deleted file mode 100644 index 48c0a0ef9eaa5f..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/concat_multi_channels.hpp +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include - -#include - -#include "concat.hpp" -#include "common/subgraph.hpp" -#include "common/fake_quantize_dequantization.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -class TRANSFORMATIONS_API ConcatMultiChannelsTransformation : public ConcatTransformation { -public: - ConcatMultiChannelsTransformation(const Params& params) : ConcatTransformation(params) {} - ~ConcatMultiChannelsTransformation() override {}; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; - bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; - -private: - // Go through the parent elements of the layer and fill dequantization collection - // with Dq operations that should be inserted before the layer. - void fillDequantization( - const std::shared_ptr layer, - const std::unordered_map& dequantizationByFakeQuantize, - std::vector& dequantization) const; - - FakeQuantizeDequantization getConcatenatedDequantization( - const std::shared_ptr concat, - const std::vector& dequantization) const; - - static FakeQuantizeDequantization getFoldedDequantization( - const std::shared_ptr operation, - const FakeQuantizeDequantization& dequantization, - const size_t sourceOutputIdx); - - bool isMultiChannel(const std::vector>& concatLayers) const noexcept; -}; - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convert.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convert.hpp index ca860903420873..cf7299c9def383 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/convert.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/convert.hpp @@ -11,12 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ConvertTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ConvertTransformation : public LayerTransformation { public: - ConvertTransformation(const Params& params) : LayerTransformation(params) {} - ~ConvertTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + ConvertTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp index ea2219df6e5863..f9584eb6842e60 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/convert_subtract_constant.hpp @@ -7,14 +7,14 @@ #include #include -#include +#include #include namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ConvertSubtractConstant; +class LP_TRANSFORMATIONS_API ConvertSubtractConstant; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convolution.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convolution.hpp index e3041a0b08f2c1..5542d04d70adb3 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/convolution.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/convolution.hpp @@ -11,12 +11,13 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ConvolutionTransformation : public WeightableLayerTransformation { +class LP_TRANSFORMATIONS_API ConvolutionTransformation : public WeightableLayerTransformation { public: - ConvolutionTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; - bool isQuantized(std::shared_ptr layer) const noexcept override; + NGRAPH_RTTI_DECLARATION; + ConvolutionTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; + bool isQuantized(const std::shared_ptr& layer) const noexcept override; + static bool isQuantizedStatic(const std::shared_ptr& layer) noexcept; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp index d6bbe504dc6eea..35b5d806be1a7b 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/convolution_backprop_data.hpp @@ -11,13 +11,13 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation { +class LP_TRANSFORMATIONS_API ConvolutionBackpropDataTransformation : public WeightableLayerTransformation { public: - ConvolutionBackpropDataTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + ConvolutionBackpropDataTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; - bool isQuantized(std::shared_ptr layer) const noexcept override; + bool isQuantized(const std::shared_ptr& layer) const noexcept override; + static bool isQuantizedStatic(const std::shared_ptr& layer) noexcept; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/create_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/create_attribute.hpp new file mode 100644 index 00000000000000..819cd11b430306 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/create_attribute.hpp @@ -0,0 +1,61 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/base_matcher_pass.hpp" +#include "low_precision/lpt_itt.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class CreateAttribute; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +enum class AttributeSource { + Node, + OutputPort +}; + +template +class ngraph::pass::low_precision::CreateAttribute : public ngraph::pass::low_precision::BaseMatcherPass { +public: + CreateAttribute(const AttributeSource source = AttributeSource::Node) { + assert((source == AttributeSource::Node) || (source == AttributeSource::OutputPort)); + auto operation = std::is_same::value ? + pattern::any_input() : + pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "CreateAttribute"); + const auto attribute = ngraph::VariantWrapper::create(op, params); + if (attribute == nullptr) { + return false; + } + } + return true; + }; + + auto matcher = std::make_shared(operation, "CreateAttribute"); + this->register_matcher(matcher, callback); + } +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/create_precisions_dependent_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/create_precisions_dependent_attribute.hpp new file mode 100644 index 00000000000000..4104d646e23b0f --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/create_precisions_dependent_attribute.hpp @@ -0,0 +1,70 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include "rt_info/precision_preserved_attribute.hpp" +#include "network_helper.hpp" +#include "lpt_itt.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class CreatePrecisionsDependentAttribute; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +template +class ngraph::pass::low_precision::CreatePrecisionsDependentAttribute : public ngraph::pass::MatcherPass { +public: + CreatePrecisionsDependentAttribute() { + auto operation = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) { + auto node = m.get_match_root(); + if (transformation_callback(node)) { + return false; + } + + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "CreatePrecisionsDependentAttribute"); + auto &rt = node->get_rt_info(); + + const auto precisionPreservedAttribute = std::make_shared>( + std::make_shared(false)); + rt[ngraph::VariantWrapper::type_info.name] = precisionPreservedAttribute; + const auto &targetSharedValue = precisionPreservedAttribute->get()->sharedValue; + + const auto attribute = std::make_shared>>( + std::make_shared()); + rt[ngraph::VariantWrapper>::type_info.name] = attribute; + + ngraph::pass::low_precision::NetworkHelper::reassign( + targetSharedValue, + { + std::dynamic_pointer_cast(attribute->get()), + std::dynamic_pointer_cast(precisionPreservedAttribute->get()) + }); + } + return true; + }; + + auto matcher = std::make_shared(operation, "CreatePrecisionsDependentAttribute"); + this->register_matcher(matcher, callback); + } +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/depth_to_space.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/depth_to_space.hpp index 0fc9d6446897d1..b02ead7321b622 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/depth_to_space.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/depth_to_space.hpp @@ -10,12 +10,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API DepthToSpaceTransformation : public TransparentBaseTransformation { +class LP_TRANSFORMATIONS_API DepthToSpaceTransformation : public TransparentBaseTransformation { public: - DepthToSpaceTransformation(const Params& params) : TransparentBaseTransformation(params) {} - ~DepthToSpaceTransformation() override {} - bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + NGRAPH_RTTI_DECLARATION; + DepthToSpaceTransformation(const Params& params = Params()); + bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp index 67cc0f9904136d..c648d6efadc4b0 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/eltwise_base_transformation.hpp @@ -12,7 +12,7 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API EltwiseBaseTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API EltwiseBaseTransformation : public LayerTransformation { public: EltwiseBaseTransformation(const Params& params) : LayerTransformation(params) {} bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp index ac75f406a2be98..15975782ef07f5 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize.hpp @@ -13,17 +13,20 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FakeQuantizeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FakeQuantizeTransformation : public LayerTransformation { public: - FakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FakeQuantizeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; static bool checkElementwise(const std::shared_ptr& eltwise); private: - std::shared_ptr fuseElementwise(TransformationContext& context, const std::shared_ptr& fakeQuantize) const; + std::shared_ptr fuseElementwise( + TransformationContext& context, + MatcherPass* matcherPass, + const std::shared_ptr& fakeQuantize) const; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp index 0c6da56592e334..45948ca32b72ad 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp @@ -13,11 +13,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FakeQuantizeDecompositionTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FakeQuantizeDecompositionTransformation : public LayerTransformation { public: - FakeQuantizeDecompositionTransformation(const Params& params) : LayerTransformation(params) {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FakeQuantizeDecompositionTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp index d41706f920579b..4390b7290e2f60 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fold_convert.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FoldConvertTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FoldConvertTransformation : public LayerTransformation { public: - FoldConvertTransformation(const Params& params) : LayerTransformation(params) {} - ~FoldConvertTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FoldConvertTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fold_fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fold_fake_quantize.hpp new file mode 100644 index 00000000000000..7f2862fc942288 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fold_fake_quantize.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "low_precision/layer_transformation.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API FoldFakeQuantizeTransformation : public LayerTransformation { +public: + NGRAPH_RTTI_DECLARATION; + FoldFakeQuantizeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; + bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; + bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; +}; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_convert.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_convert.hpp index e8f2e864e46e29..4ccc59808ad129 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_convert.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_convert.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FuseConvertTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FuseConvertTransformation : public LayerTransformation { public: - FuseConvertTransformation(const Params& params) : LayerTransformation(params) {} - ~FuseConvertTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FuseConvertTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp index 8d46c68f3d77d1..b752df52a494cd 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FuseFakeQuantizeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FuseFakeQuantizeTransformation : public LayerTransformation { public: - FuseFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {} - ~FuseFakeQuantizeTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FuseFakeQuantizeTransformation(const Params& params); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; private: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_multiply_to_fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_multiply_to_fake_quantize.hpp index dea0fa340551b3..d43aa87441eb29 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_multiply_to_fake_quantize.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_multiply_to_fake_quantize.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FuseMultiplyToFakeQuantizeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FuseMultiplyToFakeQuantizeTransformation : public LayerTransformation { public: - FuseMultiplyToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {} - ~FuseMultiplyToFakeQuantizeTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FuseMultiplyToFakeQuantizeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_subtract_to_fake_quantize.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_subtract_to_fake_quantize.hpp index 2c67aebfcf186a..80d6f22f785eff 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/fuse_subtract_to_fake_quantize.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/fuse_subtract_to_fake_quantize.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API FuseSubtractToFakeQuantizeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API FuseSubtractToFakeQuantizeTransformation : public LayerTransformation { public: - FuseSubtractToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) {} - ~FuseSubtractToFakeQuantizeTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + FuseSubtractToFakeQuantizeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/group_convolution.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/group_convolution.hpp index 0372f0173d9d87..d53c37b8df93b8 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/group_convolution.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/group_convolution.hpp @@ -11,12 +11,13 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API GroupConvolutionTransformation : public ConvolutionTransformation { +class LP_TRANSFORMATIONS_API GroupConvolutionTransformation : public ConvolutionTransformation { public: - GroupConvolutionTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; - bool isQuantized(std::shared_ptr layer) const noexcept override; + NGRAPH_RTTI_DECLARATION; + GroupConvolutionTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; + bool isQuantized(const std::shared_ptr& layer) const noexcept override; + static bool isQuantizedStatic(const std::shared_ptr& layer) noexcept; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/ilayer_transformations_manager.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/ilayer_transformations_manager.hpp deleted file mode 100644 index 389584b7448203..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/ilayer_transformations_manager.hpp +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include "transformations_visibility.hpp" - -namespace ngraph { -namespace pass { - -/** - * @brief low precision transformation component interface. - */ -class TRANSFORMATIONS_API ILayerTransformationsManager { -public: - virtual bool isQuantized(const std::shared_ptr& layer) const noexcept = 0; - virtual bool isPrecisionPreserved(const std::shared_ptr& layer) const noexcept = 0; -}; - -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/interpolate.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/interpolate.hpp index 184d1c159fe615..9d454e59542dd8 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/interpolate.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/interpolate.hpp @@ -10,12 +10,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API InterpolateTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API InterpolateTransformation : public LayerTransformation { public: - InterpolateTransformation(const Params& params) : LayerTransformation(params) {} - ~InterpolateTransformation() override {} - bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + NGRAPH_RTTI_DECLARATION; + InterpolateTransformation(const Params& params = Params()); + bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/iparams_manager.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/iparams_manager.hpp deleted file mode 100644 index 2d45179a600b9a..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/iparams_manager.hpp +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include - -namespace ngraph { -namespace pass { - -/** - * @brief low precision transformation component interface. - */ -class TRANSFORMATIONS_API IParamsManager { -public: - // TODO FIXME: it is not correct to have a string as a key here, try to use NodeTypeInfo - virtual std::vector getPrecisionsOnActivations(const Node& op) const noexcept = 0; -}; - -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp index 06a37ab8b22015..40807928305e85 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/layer_transformation.hpp @@ -13,8 +13,6 @@ #include #include -#include "iparams_manager.hpp" -#include "ilayer_transformations_manager.hpp" #include "transformation_context.hpp" #include "quantization_details.hpp" #include "low_precision/common/ie_lpt_exception.hpp" @@ -41,7 +39,7 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API DataPrecision { +class LP_TRANSFORMATIONS_API DataPrecision { public: DataPrecision() : precision(element::undefined), min(0.f), max(0.f), hasZeroPoint(false) {} @@ -108,6 +106,17 @@ class TRANSFORMATIONS_API DataPrecision { } } + // Return maximum value for quantization level. Quantization level is maximum value for precision. + static float getMaxValue(const size_t maxLevelsForPrecision) { + if (maxLevelsForPrecision == 255ul) { + return 254.f; + } else if (maxLevelsForPrecision == 256ul) { + return 255.f; + } else { + THROW_TRANSFORMATION_EXCEPTION << "unexpected quantization level " << maxLevelsForPrecision; + } + } + static bool hasNegativeValues(const std::vector& values) { for (const float value : values) { if (value < 0.0) { @@ -148,92 +157,28 @@ inline std::ostream &operator << (std::ostream &os, const DataPrecision& value) } // Base class for all LP transformations, holds some common data structures -class TRANSFORMATIONS_API LayerTransformation { +class LP_TRANSFORMATIONS_API LayerTransformation : public ngraph::pass::MatcherPass { public: - enum QuantizedTensorAlignment { - None, - UpdateLevel - }; - class Params { public: Params( - const bool updatePrecisions = true, - const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations = QuantizedTensorAlignment::UpdateLevel, - const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights = QuantizedTensorAlignment::None, - bool supportAsymmetricQuantization = false, - std::vector precisionsOnActivations = { element::u8, element::i8 }, - std::vector precisionsOnWeights = { element::i8 }, - element::Type deqPrecision = element::f32, - bool support3DTensorOnActivations = true, - bool deconvolutionSpecificChannelsRatio = false) : - updatePrecisions(updatePrecisions), - quantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations), - quantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights), - supportAsymmetricQuantization(supportAsymmetricQuantization), - precisionsOnActivations(precisionsOnActivations), - precisionsOnWeights(precisionsOnWeights), - deqPrecision(deqPrecision), - support3DTensorOnActivations(support3DTensorOnActivations), - deconvolutionSpecificChannelsRatio(deconvolutionSpecificChannelsRatio) { - if (precisionsOnActivations.size() == 0ul) { - THROW_TRANSFORMATION_EXCEPTION << "precisions on activations are not specisifed"; - } - - if (precisionsOnWeights.size() == 0ul) { - THROW_TRANSFORMATION_EXCEPTION << "precisions on weights are not specisifed"; - } - } + const bool updatePrecisions = true, + element::Type deqPrecision = element::f32) : + updatePrecisions(updatePrecisions), + deqPrecision(deqPrecision) {} Params& setUpdatePrecisions(const bool updatePrecisions) { this->updatePrecisions = updatePrecisions; return *this; } - Params& setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) { - this->quantizedTensorAlignmentOnActivations = quantizedTensorAlignmentOnActivations; - return *this; - } - - Params& setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) { - this->quantizedTensorAlignmentOnWeights = quantizedTensorAlignmentOnWeights; - return *this; - } - - Params& setSupportAsymmetricQuantization(const bool supportAsymmetricQuantization) { - this->supportAsymmetricQuantization = supportAsymmetricQuantization; - return *this; - } - - Params& setPrecisionsOnActivations(const std::vector& precisionsOnActivations) { - this->precisionsOnActivations = precisionsOnActivations; - return *this; - } - - Params& setPrecisionsOnWeights(const std::vector& precisionsOnWeights) { - this->precisionsOnWeights = precisionsOnWeights; - return *this; - } - - Params& setSupport3DTensorOnActivations(const bool support3DTensorOnActivations) { - this->support3DTensorOnActivations = support3DTensorOnActivations; - return *this; - } - - Params& setDeconvolutionSpecificChannelsRatio(const bool deconvolutionSpecificChannelsRatio) { - this->deconvolutionSpecificChannelsRatio = deconvolutionSpecificChannelsRatio; + Params& setDeqPrecision(const element::Type& deqPrecision) { + this->deqPrecision = deqPrecision; return *this; } bool updatePrecisions; - QuantizedTensorAlignment quantizedTensorAlignmentOnActivations; - QuantizedTensorAlignment quantizedTensorAlignmentOnWeights; - bool supportAsymmetricQuantization; - std::vector precisionsOnActivations; - std::vector precisionsOnWeights; element::Type deqPrecision; - bool support3DTensorOnActivations; - bool deconvolutionSpecificChannelsRatio; }; class PrecisionDetails { @@ -243,55 +188,49 @@ class TRANSFORMATIONS_API LayerTransformation { hasNegativeOutput(hasNegativeOutput), hasZeroPoint(hasZeroPoint) {} - const element::Type precision; - const bool hasNegativeOutput; - const bool hasZeroPoint; + element::Type precision; + bool hasNegativeOutput; + bool hasZeroPoint; }; LayerTransformation(const Params& params); virtual ~LayerTransformation() = default; - virtual void registerMatcherIn(ngraph::pass::GraphRewrite& pass, TransformationContext& context) const = 0; - virtual bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const = 0; + virtual bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) = 0; - void setParamsManager(IParamsManager* paramsManager) noexcept; - void setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept; + void setContext(TransformationContext* context) noexcept; void setUpdatePrecisions(const bool updatePrecisions); - void setQuantizedTensorAlignmentOnActivations(const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations); - void setQuantizedTensorAlignmentOnWeights(const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights); - - void setQuantizationIntervalAsymmetryThreshold(const float value); - void setZeroThreshold(const float value); - void setMinQuantizationLevels(const size_t levels); - - const std::vector& getPrecisionsOnActivations() const; - const std::vector& getPrecisionsOnWeights() const; virtual bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const; - - bool canSubtractBeHandled(const std::shared_ptr& op, const size_t parentIndex = 0ul) const; + static bool canBeTransformedStatic(const std::shared_ptr& layer); bool canSubtractBeHandled(const std::shared_ptr& op, const FakeQuantizeDequantization& dequantization) const; - PrecisionDetails getPrecisionDetails(const QuantizationDetails& quantizationDetails) const; + // Get precision based on FakeQuantize operation. + // Undefined value is expected. In this case the accuracy has to be defined by the calling code. + // TODO: LPT: INT8 specific here + static PrecisionDetails getPrecisionDetails( + const size_t quantizationLevels, + const std::vector& outputLowValues, + const std::vector& outputHighValues); + static PrecisionDetails getPrecisionDetails(const QuantizationDetails& quantizationDetails); + + static bool isAsymmetricQuantization(const std::shared_ptr& node); // return true if operation can be quantized and false otherwise // for example: if convolution operation weights are not quantized, then isQuantize returns false and true otherwise // note: dequantization operations on activations are absent during method execution - virtual bool isQuantized(std::shared_ptr layer) const noexcept; + virtual bool isQuantized(const std::shared_ptr& layer) const noexcept; // return true if operation can be preserved for precision // note: dequantization operations on activations are absent during method execution virtual bool isPrecisionPreserved(std::shared_ptr layer) const noexcept = 0; - DataPrecision getDataPrecision( - std::shared_ptr layer, + // weights specific + static DataPrecision getDataPrecision( + const std::shared_ptr& layer, const QuantizationDetails& quantizationDetails, - const bool onWeights) const; - - void fillAvailablePrecisions(std::shared_ptr layer, std::vector& availablePrecisions) const; - - std::vector> getChildrenRecursivelyExceptPrecisionPreserved(const std::shared_ptr& op) const noexcept; + const std::vector& precisions); protected: #ifdef LPT_PRINT_DEQUANTIZATION_INFO @@ -303,24 +242,10 @@ class TRANSFORMATIONS_API LayerTransformation { #endif bool updatePrecisions; - QuantizedTensorAlignment quantizedTensorAlignmentOnActivations; - QuantizedTensorAlignment quantizedTensorAlignmentOnWeights; - bool supportAsymmetricQuantization; - std::vector precisionsOnActivations; - std::vector precisionsOnWeights; element::Type deqPrecision; - bool support3DTensorOnActivations; - bool deconvolutionSpecificChannelsRatio; - - // absolute value, used to determine quantization interval asymmetry - float quantizationIntervalAsymmetryThreshold; - // absolute value, used to determine zero - float zeroThreshold; - size_t minQuantizationLevels; static const char originalLayerPostfix[]; - IParamsManager* paramsManager; - ILayerTransformationsManager* layerTransformationsManager; + TransformationContext* context; protected: std::shared_ptr moveDequantizationAfter( @@ -340,7 +265,7 @@ class TRANSFORMATIONS_API LayerTransformation { std::shared_ptr lastNode, std::string originalName) const; - void addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr patternRoot) const; + void addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr patternRoot); //TODO: replace with canBeTransformed when quantization by special dimension is supported for all transformations bool canBeTransformedSpatialDimension(const TransformationContext& context, std::shared_ptr layer) const; @@ -358,38 +283,6 @@ class TRANSFORMATIONS_API LayerTransformation { } }; -inline std::ostream &operator << (std::ostream &os, const LayerTransformation::QuantizedTensorAlignment& value) { - switch (value) { - case LayerTransformation::QuantizedTensorAlignment::None: { - os << "None"; - break; - } - case LayerTransformation::QuantizedTensorAlignment::UpdateLevel: { - os << "UpdateLevel"; - break; - } - default: { - os << static_cast(value); - break; - } - } - return os; -} - -inline std::ostream &operator << (std::ostream &os, const std::vector& values) { - os << "{"; - for (size_t i = 0; i < values.size(); ++i) { - const element::Type& value = values[i]; - if (i > 0) { - os << value; - } else { - os << ", " << value; - } - } - os << "}"; - return os; -} - typedef std::shared_ptr LayerTransformationPtr; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/low_precision.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/low_precision.hpp new file mode 100644 index 00000000000000..454ebebfda338c --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/low_precision.hpp @@ -0,0 +1,74 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +// one place to include all Low Precision Transformations from ngraph::pass::low_precision +#include +#include +#include +#include + +#include +#include +#include +#include + + +#include +#include +#include +#include "low_precision/layer_transformation.hpp" +#include "low_precision/markup_precisions.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API TypeRelaxedReplacer; +class LP_TRANSFORMATIONS_API MarkupOptimizations; +class LP_TRANSFORMATIONS_API LowPrecision; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::MarkupOptimizations : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + MarkupOptimizations( + const std::vector& precisionRestrictions, + const std::vector& quantizationRestrictions); + bool run_on_function(std::shared_ptr f) override; +private: + const std::vector& precisionRestrictions; + const std::vector& quantizationRestrictions; +}; + +class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::TypeRelaxedReplacer : public ngraph::pass::GraphRewrite { +public: + NGRAPH_RTTI_DECLARATION; + TypeRelaxedReplacer(); +}; + +class LP_TRANSFORMATIONS_API ngraph::pass::low_precision::LowPrecision : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + LowPrecision( + const std::vector& precisionRestrictions = {}, + const std::vector& quantizationRestrictions = {}, + const LayerTransformation::Params = LayerTransformation::Params()); + bool run_on_function(std::shared_ptr f) override; + + static bool isFunctionQuantized(const std::shared_ptr& function); + +protected: + std::vector precisionRestrictions; + std::vector quantizationRestrictions; + // remove + LayerTransformation::Params params; +}; diff --git a/inference-engine/src/low_precision_transformations/src/lpt_itt.h b/inference-engine/src/low_precision_transformations/include/low_precision/lpt_itt.hpp similarity index 95% rename from inference-engine/src/low_precision_transformations/src/lpt_itt.h rename to inference-engine/src/low_precision_transformations/include/low_precision/lpt_itt.hpp index 5b3f1b524bcb9d..081c5b8d39e79e 100644 --- a/inference-engine/src/low_precision_transformations/src/lpt_itt.h +++ b/inference-engine/src/low_precision_transformations/include/low_precision/lpt_itt.hpp @@ -4,11 +4,12 @@ /** * @brief Defines openvino domains for tracing - * @file lpt_itt.h + * @file lpt_itt.hpp */ #pragma once + #include namespace ngraph { diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/lpt_visibility.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/lpt_visibility.hpp new file mode 100644 index 00000000000000..3867192208f652 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/lpt_visibility.hpp @@ -0,0 +1,18 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "ngraph/visibility.hpp" + +/** + * @file lpt_visibility.hpp + * @brief Defines visibility settings for Inference Engine LP Transformations library + */ + +#ifdef inference_engine_lp_transformations_EXPORTS +#define LP_TRANSFORMATIONS_API NGRAPH_HELPER_DLL_EXPORT +#else +#define LP_TRANSFORMATIONS_API NGRAPH_HELPER_DLL_IMPORT +#endif diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/main.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/main.hpp deleted file mode 100644 index 79ce4f06ace999..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/main.hpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#include - -#include - -#include -#include -#include - -using namespace std; - - -namespace ngraph { -namespace pass { - -class TRANSFORMATIONS_API LowPrecisionTransformations: public ngraph::pass::GraphRewrite, IParamsManager, ILayerTransformationsManager { -public: - bool run_on_function(std::shared_ptr f) override; - - // IParamsManager interface implementation - std::vector getPrecisionsOnActivations(const NodeTypeInfo& layerName) const noexcept override; - - // ILayerTransformationsManager interface implementation - bool isQuantized(std::shared_ptr layer) const noexcept override; - bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; -}; - -}// namespace pass -}// namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/markup_avg_pool_precision_preserved.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/markup_avg_pool_precision_preserved.hpp new file mode 100644 index 00000000000000..e3a517bff307a2 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_avg_pool_precision_preserved.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API MarkupAvgPoolPrecisionPreserved; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/markup_can_be_quantized.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/markup_can_be_quantized.hpp new file mode 100644 index 00000000000000..82f66857337c3a --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_can_be_quantized.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API MarkupCanBeQuantized; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::MarkupCanBeQuantized : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/markup_per_tensor_quantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/markup_per_tensor_quantization.hpp new file mode 100644 index 00000000000000..5aa9f76b1fd23f --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_per_tensor_quantization.hpp @@ -0,0 +1,44 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include "common/operation_per_tensor_quantization_restriction.hpp" +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API MarkupPerTensorQuantization; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::MarkupPerTensorQuantization : public ngraph::pass::FunctionPass { +public: + class PerTensorQuantization { + public: + explicit PerTensorQuantization(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {} + void add(const uint64_t version, const std::vector& ports) { + portsByVersion.emplace(version, ports); + } + + bool versionIsRequired; + std::unordered_map> portsByVersion; + }; + + NGRAPH_RTTI_DECLARATION; + explicit MarkupPerTensorQuantization(const std::vector& restrictions = {}); + bool run_on_function(std::shared_ptr f) override; + +private: + std::unordered_map restrictionsByOperation; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/markup_precisions.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/markup_precisions.hpp new file mode 100644 index 00000000000000..87c7cc85a40824 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/markup_precisions.hpp @@ -0,0 +1,47 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/common/operation_precision_restriction.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API MarkupPrecisions; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +// Transformation is used to add customization options runtime +class ngraph::pass::low_precision::MarkupPrecisions : public ngraph::pass::FunctionPass { +public: + class Restriction { + public: + explicit Restriction(const bool versionIsRequired) : versionIsRequired(versionIsRequired) {} + void add(const uint64_t version, const std::vector>>& precisions) { + precisionsByVersion.emplace(version, precisions); + } + + bool versionIsRequired; + std::unordered_map>>> precisionsByVersion; + }; + + NGRAPH_RTTI_DECLARATION; + explicit MarkupPrecisions(const std::vector& restrictions = {}); + bool run_on_function(std::shared_ptr f) override; + +private: + static bool isPrecisionPreserved(const std::shared_ptr& node); + static bool isSupported(const std::shared_ptr& node); + std::unordered_map restrictionsByOperation; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/mat_mul.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/mat_mul.hpp index 332d28b934b44e..067f82ea59b28b 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/mat_mul.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/mat_mul.hpp @@ -11,14 +11,14 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API MatMulTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API MatMulTransformation : public LayerTransformation { public: - MatMulTransformation(const Params& params) : LayerTransformation(params) {} - ~MatMulTransformation() override {} - bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + NGRAPH_RTTI_DECLARATION; + MatMulTransformation(const Params& params = Params()); + bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; + static bool is3DTensorOnActivations(const std::shared_ptr& node); }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/max_pool.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/max_pool.hpp index 2cf1d54eda7f44..ca2b8a08272817 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/max_pool.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/max_pool.hpp @@ -12,12 +12,12 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API MaxPoolTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API MaxPoolTransformation : public LayerTransformation { public: - MaxPoolTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + NGRAPH_RTTI_DECLARATION; + MaxPoolTransformation(const Params& params = Params()); bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp index 30f1cff5444d37..da226fe263b757 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/multiply.hpp @@ -11,12 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API MultiplyTransformation : public EltwiseBaseTransformation { +class LP_TRANSFORMATIONS_API MultiplyTransformation : public EltwiseBaseTransformation { public: - MultiplyTransformation(const Params& params) : EltwiseBaseTransformation(params) {} - ~MultiplyTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + MultiplyTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp index d4a575f4d9a9de..5e6bd900d8ea9e 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/multiply_to_group_convolution.hpp @@ -7,24 +7,30 @@ #include #include #include "low_precision/layer_transformation.hpp" +#include "common/operation_precision_restriction.hpp" namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API MultiplyToGroupConvolutionTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API MultiplyToGroupConvolutionTransformation : public LayerTransformation { public: - MultiplyToGroupConvolutionTransformation(const Params& params) : LayerTransformation(params), groupSize(1ul) {} + NGRAPH_RTTI_DECLARATION; + MultiplyToGroupConvolutionTransformation( + const Params& params = Params(), + const OperationPrecisionRestriction::PrecisionsByPort& restrictions = {}); ~MultiplyToGroupConvolutionTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; - bool isQuantized(std::shared_ptr layer) const noexcept override; + bool isQuantized(const std::shared_ptr& layer) const noexcept override; + static bool canBeTransformedToGroupConvolution(const std::shared_ptr& layer) noexcept; + static bool isDynamicOrScalar(const std::shared_ptr& node); void setGroupSize(const size_t groupSize); size_t getGroupSize() const; private: + OperationPrecisionRestriction::PrecisionsByPort restrictions; size_t groupSize; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/mvn.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/mvn.hpp index 37244a3aa74c0b..42ddd6f0b620a1 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/mvn.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/mvn.hpp @@ -10,11 +10,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API MVNTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API MVNTransformation : public LayerTransformation { public: - MVNTransformation(const Params& params) : LayerTransformation(params) {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + MVNTransformation(const Params& params = Params()); + bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp index 891b341b87f522..77218320dba376 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp @@ -16,6 +16,10 @@ #include "ngraph_ops/type_relaxed.hpp" #include +#include "rt_info/shared_value_attribute.hpp" +#include "rt_info/precisions_attribute.hpp" +#include "rt_info/per_tensor_quantization_attribute.hpp" +#include "rt_info/intervals_alignment_attribute.hpp" #include "transformation_context.hpp" #include "quantization_details.hpp" #include "transformations/utils/utils.hpp" @@ -30,7 +34,7 @@ namespace low_precision { /** * @brief NetworkHelper class encapsulates manipulations with nGraph function. */ -class TRANSFORMATIONS_API NetworkHelper { +class LP_TRANSFORMATIONS_API NetworkHelper { public: // Return true if `type` can be castable to at least one of `type` static bool is_castable_to_one_of(NodeTypeInfo type, const std::unordered_set& types); @@ -76,6 +80,10 @@ class TRANSFORMATIONS_API NetworkHelper { static std::shared_ptr swapMultiplyAndAdd(std::shared_ptr addAfterMultiply, const int multiplyBranch); + static void copyInfo(const std::vector>& sources, const std::vector>& targets); + + static void copyInfo(const std::vector>& sources, const std::shared_ptr& target); + static void copyInfo(const std::shared_ptr& source, const std::shared_ptr& target); static void cleanRunTimeInfo(const std::shared_ptr& layer); @@ -116,7 +124,8 @@ class TRANSFORMATIONS_API NetworkHelper { std::shared_ptr fq, element::Type precision, float min, - float max); + float max, + const bool replace = true); static FakeQuantizeDequantization makeDequantization( const float dequantizationMul, @@ -124,7 +133,8 @@ class TRANSFORMATIONS_API NetworkHelper { const ngraph::element::Type originalPrecision, const ngraph::PartialShape dataNodeOutputShape, element::Type precision, - const element::Type deqPrecision = element::f32); + const element::Type deqPrecision = element::f32, + std::shared_ptr input = nullptr); static FakeQuantizeDequantization createDequantizationFromFakeQuantize( std::shared_ptr fq, @@ -143,7 +153,7 @@ class TRANSFORMATIONS_API NetworkHelper { static FakeQuantizeDequantization getDequantization(const std::shared_ptr& node, const size_t parentIndex = 0ul, const bool inPlace = false); - static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr& node); + static FakeQuantizeDequantization getDequantizationBelow(const std::shared_ptr& node, const bool convertIsMandatory = false); static FakeQuantizeDequantization normalizeDequantization(FakeQuantizeDequantization dequantization); @@ -200,6 +210,115 @@ class TRANSFORMATIONS_API NetworkHelper { static bool isDQByDynamicDimension(const std::shared_ptr& layer, size_t inputIdx = 0); + static bool isPrecisionPreserved(const std::shared_ptr& node); + + static void replaceAttributeInNodes( + std::shared_ptr f, + const std::string& name, + const std::shared_ptr newAttribute, + const std::shared_ptr oldAttribute, + const std::shared_ptr& initialNode) { + std::set> visited; + std::deque> nodes; + nodes.emplace_back(initialNode); + + while (!nodes.empty()) { + auto node = nodes.front(); + nodes.pop_front(); + + if (visited.count(node) || is_type(node)) { + continue; + } + + visited.insert(node); + + bool handleConnectedNodes = false; + if (NetworkHelper::isPrecisionPreserved(node) || is_type(node)) { + auto& rt = node->get_rt_info(); + + if (node == initialNode) { + rt[name] = newAttribute; + handleConnectedNodes = true; + } else { + auto it = rt.find(name); + if (it != rt.end()) { + const auto currentAttribute = it->second; + if (oldAttribute.get() == currentAttribute.get()) { + rt[name] = newAttribute; + } + handleConnectedNodes = true; + } + } + } + + if (!handleConnectedNodes) { + continue; + } + + if (!is_type(node)) { + for (size_t index = 0ul; index < node->get_input_size(); ++index) { + auto getInput = [](const std::shared_ptr& node, const size_t index) { + const auto dequantization = NetworkHelper::getDequantization(node, index); + if (!dequantization.empty() && + (is_type(dequantization.data.get_node())) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + const auto input = dequantization.data.get_node()->input(0); + return input; + } + return node->input(index); + }; + + const auto& input = getInput(node, index); + const auto& input_node = input.get_source_output().get_node_shared_ptr(); + + //const auto& input_node = input.get_source_output().get_node_shared_ptr(); + if (visited.count(input_node) || is_type(input_node)) { + continue; + } + + nodes.push_front(input_node); + } + } + + for (auto& output : node->outputs()) { + for (auto& input_value : output.get_target_inputs()) { + const auto& output_node = input_value.get_node()->shared_from_this(); + if (visited.count(output_node) || is_type(output_node)) { + continue; + } + + nodes.push_front(output_node); + } + } + } + } + + template + static void reassign( + const std::shared_ptr& sharedValue, + const std::vector>& attributes) { + for (const auto attributeWeakPtr : attributes) { + auto attribute = attributeWeakPtr.lock(); + if (attribute == nullptr) { + continue; + } + attribute->sharedValue = sharedValue; + sharedValue->attributes.push_back(attribute); + } + } + + static size_t calculateLevels( + const float dataPrecisionMin, + const float dataPrecisionMax, + const float combinedIntervalLow, + const float combinedIntervalHigh, + const float minIntervalLow, + const float minIntervalHigh, + float& dequantizationMul, + float& dequantizationSub, + float& updatedOutputLowValue, + float& updatedOutputHighValue); + private: static std::shared_ptr foldFakeQuantize( const std::shared_ptr& fq, @@ -292,6 +411,54 @@ std::shared_ptr fold_reshape(Args&&... args) { return node; } +template +std::shared_ptr> getAttribute(const std::shared_ptr& inputNode) { + auto& rt = inputNode->get_rt_info(); + auto it = rt.find(ngraph::VariantWrapper::type_info.name); + if (it == rt.end()) { + return nullptr; + } + + auto attribute = std::dynamic_pointer_cast>(it->second); + assert(attribute != nullptr); + return attribute; +} + +template +std::shared_ptr> getAttribute(const Input& input) { + auto& rt = input.get_rt_info(); + auto it = rt.find(ngraph::VariantWrapper::type_info.name); + if (it == rt.end()) { + return nullptr; + } + + auto attribute = std::dynamic_pointer_cast>(it->second); + assert(attribute != nullptr); + return attribute; +} + +template +std::shared_ptr> getAttributeFromOutput(const Output& output) { + auto& rt = output.get_rt_info(); + auto it = rt.find(ngraph::VariantWrapper::type_info.name); + if (it == rt.end()) { + return nullptr; + } + + auto attribute = std::dynamic_pointer_cast>(it->second); + assert(attribute != nullptr); + return attribute; +} + +bool isDisabled(const std::shared_ptr& node); + +template +std::shared_ptr make_shared_attribute(Args&& ... args) { + std::shared_ptr attribute = std::make_shared(std::forward(args)...); + attribute->sharedValue->attributes.push_back(attribute); + return attribute; +} + } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/normalize_l2.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/normalize_l2.hpp index 9591a631e86a6b..88a113cb38a49d 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/normalize_l2.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/normalize_l2.hpp @@ -10,11 +10,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API NormalizeL2Transformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API NormalizeL2Transformation : public LayerTransformation { public: - NormalizeL2Transformation(const Params& params) : LayerTransformation(params) {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + NormalizeL2Transformation(const Params& params = Params()); + bool transform(TransformationContext &context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/prelu.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/prelu.hpp index ef767127315a60..e58d4b25615752 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/prelu.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/prelu.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API PReluTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API PReluTransformation : public LayerTransformation { public: - PReluTransformation(const Params& params) : LayerTransformation(params) {} - ~PReluTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + PReluTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_precisions.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_precisions.hpp new file mode 100644 index 00000000000000..5995b6473722dd --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_precisions.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace ngraph { +namespace pass { +namespace low_precision { + +class LP_TRANSFORMATIONS_API PropagatePrecisions; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +class ngraph::pass::low_precision::PropagatePrecisions : public ngraph::pass::FunctionPass { +public: + NGRAPH_RTTI_DECLARATION; + bool run_on_function(std::shared_ptr f) override; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_shared_value.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_shared_value.hpp new file mode 100644 index 00000000000000..9866d63197ff1d --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_shared_value.hpp @@ -0,0 +1,164 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include +#include "low_precision/network_helper.hpp" +#include "lpt_itt.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class LP_TRANSFORMATIONS_API PropagateSharedValue; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +template +class ngraph::pass::low_precision::PropagateSharedValue : public ngraph::pass::FunctionPass { +public: + bool run_on_function(std::shared_ptr f) override { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateSharedValue"); + + std::vector> nodes(f->get_ordered_ops()); + for (auto it = nodes.begin(); it != nodes.end(); it++) { + const std::shared_ptr node = *it; + if (is_type(node)) { + assert(node->get_output_size() == 1ul); + auto& outputRtInfo = node->output(0).get_rt_info(); + + auto attribute = make_shared_attribute(std::set{element::u8, element::i8}); + + auto attributeWrapper = std::make_shared>>(attribute); + outputRtInfo[ngraph::VariantWrapper>::type_info.name] = attributeWrapper; + continue; + } + + if (!NetworkHelper::isPrecisionPreserved(node)) { + for (auto& input : node->inputs()) { + auto parentNode = input.get_source_output().get_node_shared_ptr(); + + auto getAttributes = [](const Input& nodeInput) { + const std::string name = ngraph::VariantWrapper>::type_info.name; + + auto node = nodeInput.get_source_output().get_node_shared_ptr(); + std::vector>>> attributes; + if (is_type(node)) { + // output + auto& rt = nodeInput.get_source_output().get_rt_info(); + auto it = rt.find(name); + if (it != rt.end()) { + const auto& attribute = std::dynamic_pointer_cast>>(it->second); + attributes.push_back(attribute); + } + } + + return attributes; + }; + + auto& nodeRt = input.get_rt_info(); + + const std::string name = ngraph::VariantWrapper>::type_info.name; + const auto it = nodeRt.find(name); + if (it == nodeRt.end()) { + continue; + } + + const auto& attribute = std::dynamic_pointer_cast>>(it->second); + std::vector>>> attributes{ attribute }; + + auto parentAttributes = getAttributes(input); + if (parentAttributes.empty()) { + continue; + } + + for (auto& parentAttribute : parentAttributes) { + parentAttribute->merge(attributes); + } + + nodeRt[name] = parentAttributes[0]; + } + continue; + } + + handle(f, node); + } + return true; + } + +private: + std::vector>>> getParentInputRestrictions( + const std::shared_ptr node) { + std::vector>>> parentAttributes; + for (size_t index = 0ul; index < node->get_input_size(); index++) { + const Input& input = node->input(index); + auto inputNode = input.get_source_output().get_node()->shared_from_this(); + + const auto dequantization = NetworkHelper::getDequantization(node, index); + if (!dequantization.empty() && + (is_type(dequantization.data.get_node())) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + inputNode = dequantization.data.get_node()->get_input_node_shared_ptr(0); + } + + if (NetworkHelper::isPrecisionPreserved(inputNode)) { + auto& inputRtInfo = inputNode->get_rt_info(); + auto inputAttributeIt = inputRtInfo.find(ngraph::VariantWrapper>::type_info.name); + if (inputAttributeIt != inputRtInfo.end()) { + const auto attribute = std::dynamic_pointer_cast>>(inputAttributeIt->second); + parentAttributes.push_back(attribute); + } + } else if (is_type(inputNode)) { + const auto& outputPortRtInfo = inputNode->outputs()[0].get_rt_info(); + auto attributeIt = outputPortRtInfo.find(ngraph::VariantWrapper>::type_info.name); + if (attributeIt != outputPortRtInfo.end()) { + const auto attribute = std::dynamic_pointer_cast>>(attributeIt->second); + parentAttributes.push_back(attribute); + } + } + } + return parentAttributes; + } + + void handle(std::shared_ptr f, const std::shared_ptr& node) { + const bool precisionPreserved = NetworkHelper::isPrecisionPreserved(node); + if (precisionPreserved) { + const auto parentRestrictions = getParentInputRestrictions(node); + if (parentRestrictions.empty()) { + return; + } + + // one operation - one output precision + // merge parent inputs to one current output + auto resultAttribute = parentRestrictions[0]; + + std::vector>>> toMerge = parentRestrictions; + toMerge.erase(toMerge.begin()); + resultAttribute->merge(toMerge); + + for (size_t index = 1ul; index < parentRestrictions.size(); index++) { + const auto oldAttribute = parentRestrictions[index]->get(); + NetworkHelper::reassign( + resultAttribute->get()->sharedValue, + parentRestrictions[index]->get()->sharedValue->attributes); + } + + auto& rt = node->get_rt_info(); + rt[ngraph::VariantWrapper>::type_info.name] = resultAttribute; + } + } +}; + diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_through_precision_preserved.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_through_precision_preserved.hpp new file mode 100644 index 00000000000000..18a8f1e0ab839b --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_through_precision_preserved.hpp @@ -0,0 +1,118 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/network_helper.hpp" +#include "low_precision/lpt_itt.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class PropagateThroughPrecisionPreserved; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +template +class ngraph::pass::low_precision::PropagateThroughPrecisionPreserved : public ngraph::pass::MatcherPass { +public: + PropagateThroughPrecisionPreserved() { + ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) { + auto node = m.get_match_root(); + if (transformation_callback(node)) { + return false; + } + + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateThroughPrecisionPreserved"); + + if (!ngraph::pass::low_precision::NetworkHelper::isPrecisionPreserved(node)) { + return false; + } + + const auto parentRestrictions = getParentInputRestrictions(node); + if (parentRestrictions.empty()) { + return false; + } + + auto resultAttribute = parentRestrictions[0]; + + std::vector>>> toMerge = parentRestrictions; + // TODO: LPT: handle pointer on itself in VariantWrapper::merge and remove erase, task #59498 + toMerge.erase(toMerge.begin()); + resultAttribute->merge(toMerge); + + for (size_t index = 1ul; index < parentRestrictions.size(); index++) { + const auto attributes = parentRestrictions[index]->get()->sharedValue->attributes; + for (const auto attributeWeakPtr : attributes) { + auto attribute = attributeWeakPtr.lock(); + if (attribute == nullptr) { + continue; + } + attribute->sharedValue = resultAttribute->get()->sharedValue; + resultAttribute->get()->sharedValue->attributes.push_back(attribute); + } + } + + auto &rt = node->get_rt_info(); + rt[ngraph::VariantWrapper>::type_info.name] = resultAttribute; + } + return true; + }; + + auto matcher = std::make_shared(pattern::any_input(), "PropagateThroughPrecisionPreserved"); + this->register_matcher(matcher, callback); + } + +private: + std::shared_ptr>> getSourceOutputAttribute(const Input& input) { + auto input2 = input; + auto output = input2.get_source_output(); + std::shared_ptr>> attribute = getAttributeFromOutput>(output); + if (attribute == nullptr) { + attribute = getAttribute>(output.get_node_shared_ptr()); + } + return attribute; + } + + // TODO: possible duplicate: PropagateToInput::getSourceOutputAttribute + std::vector>>> getParentInputRestrictions( + const std::shared_ptr node) { + std::vector>>> parentAttributes; + auto getInput = [](const std::shared_ptr& node, const size_t index) -> Input { + const auto dequantization = NetworkHelper::getDequantization(node, index); + if (!dequantization.empty() && + is_type(dequantization.data.get_node()) && + (dequantization.data.get_node()->get_input_size() == 1ul) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + return dequantization.data.get_node()->input(0); + } + + return node->input(index); + }; + + for (size_t index = 0ul; index < node->get_input_size(); index++) { + const Input& input = getInput(node, index); + const auto attribute = getSourceOutputAttribute(input); + if (attribute != nullptr) { + parentAttributes.push_back(attribute); + } + } + + return parentAttributes; + } +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/propagate_to_input.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_to_input.hpp new file mode 100644 index 00000000000000..1f30ab7b4a07d5 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/propagate_to_input.hpp @@ -0,0 +1,105 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include +#include "network_helper.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class PropagateToInput; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +template +class ngraph::pass::low_precision::PropagateToInput : public ngraph::pass::MatcherPass { +public: + PropagateToInput() { + ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) { + auto node = m.get_match_root(); + if (transformation_callback(node)) { + return false; + } + + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "PropagateToInput"); + + for (auto input : node->inputs()) { + auto parentAttribute = getSourceOutputAttribute(input); + if (parentAttribute == nullptr) { + continue; + } + + auto attribute = getAttribute>(input); + if (attribute != nullptr) { + if ((attribute->get()->sharedValue != nullptr) && (attribute->get()->sharedValue->precisions.empty())) { + return false; + } + + std::vector>>> attributes = { attribute }; + parentAttribute->merge(attributes); + } + + auto& rt = input.get_rt_info(); + rt[ngraph::VariantWrapper>::type_info.name] = parentAttribute; + } + } + return true; + }; + + auto matcher = std::make_shared(pattern::any_input(), "PropagateThroughPrecisionPreserved"); + this->register_matcher(matcher, callback); + } + +private: + // TODO: possible duplicate: PropagateThroughPrecisionPreserved::getParentInputRestrictions + std::shared_ptr>> getSourceOutputAttribute(const Input& input) { + auto getInput = [](const Input& input) { + const auto dequantization = NetworkHelper::getDequantization(input.get_node()->shared_from_this(), input.get_index()); + if (!dequantization.empty() && + is_type(dequantization.data.get_node()) && + (dequantization.data.get_node()->get_input_size() == 1ul) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + return dequantization.data.get_node()->input(0); + } + + return input; + }; + + auto input2 = getInput(input); + auto output = input2.get_source_output(); + std::shared_ptr>> attribute = getAttributeFromOutput>(output); + if (attribute == nullptr) { + attribute = getAttribute>(output.get_node_shared_ptr()); + } + return attribute; + } + + std::vector>>> getParentInputRestrictions( + const std::shared_ptr node) { + std::vector>>> parentAttributes; + for (size_t index = 0ul; index < node->get_input_size(); index++) { + const Input& input = node->input(index); + const auto attribute = getSourceOutputAttribute(input); + if (attribute != nullptr) { + parentAttributes.push_back(attribute); + } + } + return parentAttributes; + } +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/pull_reshape_through_dequantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/pull_reshape_through_dequantization.hpp index 639e1a00e65c74..e8bc2add659a39 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/pull_reshape_through_dequantization.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/pull_reshape_through_dequantization.hpp @@ -6,14 +6,14 @@ #include #include -#include +#include #include namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API PullReshapeThroughDequantization; +class LP_TRANSFORMATIONS_API PullReshapeThroughDequantization; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/pull_transpose_through_dequantization.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/pull_transpose_through_dequantization.hpp index 3f1648841220b3..f9d957389e6e5a 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/pull_transpose_through_dequantization.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/pull_transpose_through_dequantization.hpp @@ -6,14 +6,14 @@ #include #include -#include +#include #include namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API PullTransposeThroughDequantization; +class LP_TRANSFORMATIONS_API PullTransposeThroughDequantization; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp index 1e4b05fce2812b..a1c2f1ca4976b1 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/quantization_details.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include @@ -18,7 +18,7 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API QuantizationDetails { +class LP_TRANSFORMATIONS_API QuantizationDetails { public: QuantizationDetails(); QuantizationDetails(const QuantizationDetails& quantizationDetails); @@ -27,33 +27,25 @@ class TRANSFORMATIONS_API QuantizationDetails { const std::vector& inputLowValues, const std::vector& inputHighValues, const std::vector& outputLowValues, - const std::vector& outputHighValues, - const size_t inputIntervalsCount, - const size_t outputIntervalsCount, - const size_t outputChannelsCount); + const std::vector& outputHighValues); static bool outputLayoutIsSupported(std::shared_ptr quantize); static void getInputIntervals( std::shared_ptr quantize, std::vector& inputLowValues, - std::vector& inputHighValues, - size_t& inputIntervalsCount); + std::vector& inputHighValues); static void getOutputIntervals( std::shared_ptr quantize, std::vector& outputLowValues, - std::vector& outputHighValues, - size_t& outputIntervalsCount); + std::vector& outputHighValues); static QuantizationDetails getDetails(std::shared_ptr); bool hasNegativeOutput() const; float maxOutput(const size_t channel) const; float maxInput(const size_t channel) const; - float maxOutputHigh() const; - float minOutputLow() const; - float getInputLowValue(const size_t channel) const; float getInputHighValue(const size_t channel) const; float getOutputLowValue(const size_t channel) const; @@ -66,19 +58,15 @@ class TRANSFORMATIONS_API QuantizationDetails { const std::vector inputHighValues; const std::vector outputLowValues; const std::vector outputHighValues; - const size_t inputIntervalsCount; - const size_t outputIntervalsCount; - const size_t outputChannelsCount; private: - static void validate(std::shared_ptr constantLayer); static std::vector getBlobValue(std::shared_ptr constantLayer); }; inline std::ostream &operator << (std::ostream &os, const QuantizationDetails& value) { os << "levels: " << value.levels << - ", input 1/" << value.inputIntervalsCount << ": [" << value.getInputLowValue(0) << " : " << value.getInputHighValue(0) << "], " << - ", output 1/" << value.outputIntervalsCount << ": [" << value.getOutputLowValue(0) << " : " << value.getOutputHighValue(0) << "]"; + ", input 1/" << value.inputLowValues.size() << ": [" << value.getInputLowValue(0) << " : " << value.getInputHighValue(0) << "], " << + ", output 1/" << value.outputLowValues.size() << ": [" << value.getOutputLowValue(0) << " : " << value.getOutputHighValue(0) << "]"; return os; } diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_base_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_base_transformation.hpp index 679a8d0f61d6db..0b9782e4eb207a 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_base_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_base_transformation.hpp @@ -19,10 +19,10 @@ namespace low_precision { * */ -class TRANSFORMATIONS_API ReduceBaseTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ReduceBaseTransformation : public LayerTransformation { public: - ReduceBaseTransformation(const Params& params); - bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override; + ReduceBaseTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const override; protected: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_max.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_max.hpp index 453f48dfeca48b..b9c2b98253ef82 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_max.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_max.hpp @@ -14,11 +14,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReduceMaxTransformation : public ReduceBaseTransformation { +class LP_TRANSFORMATIONS_API ReduceMaxTransformation : public ReduceBaseTransformation { public: - ReduceMaxTransformation(const Params& params); + NGRAPH_RTTI_DECLARATION; + ReduceMaxTransformation(const Params& params = Params()); bool isPrecisionPreserved(std::shared_ptr reduce) const noexcept override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const override; protected: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_mean.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_mean.hpp index 8f62c34cc0cec0..31f542a37548b2 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_mean.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_mean.hpp @@ -14,11 +14,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReduceMeanTransformation : public ReduceBaseTransformation { +class LP_TRANSFORMATIONS_API ReduceMeanTransformation : public ReduceBaseTransformation { public: - ReduceMeanTransformation(const Params& params); + NGRAPH_RTTI_DECLARATION; + ReduceMeanTransformation(const Params& params = Params()); bool isPrecisionPreserved(std::shared_ptr reduce) const noexcept override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const override; protected: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_min.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_min.hpp index 2545af1e9febd7..e4ccdeab97e74a 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_min.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_min.hpp @@ -14,11 +14,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReduceMinTransformation : public ReduceBaseTransformation { +class LP_TRANSFORMATIONS_API ReduceMinTransformation : public ReduceBaseTransformation { public: - ReduceMinTransformation(const Params& params); + NGRAPH_RTTI_DECLARATION; + ReduceMinTransformation(const Params& params = Params()); bool isPrecisionPreserved(std::shared_ptr reduce) const noexcept override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const override; protected: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_sum.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_sum.hpp index ae7f07efe6bc65..5053545fbff5bb 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reduce_sum.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reduce_sum.hpp @@ -14,11 +14,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation { +class LP_TRANSFORMATIONS_API ReduceSumTransformation : public ReduceBaseTransformation { public: + NGRAPH_RTTI_DECLARATION; ReduceSumTransformation(const Params& params); bool isPrecisionPreserved(std::shared_ptr reduce) const noexcept override; - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const override; protected: diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/relu.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/relu.hpp index 734a42273c50c3..1f7489a73d8337 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/relu.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/relu.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReluTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ReluTransformation : public LayerTransformation { public: - ReluTransformation(const Params& params) : LayerTransformation(params) {} - ~ReluTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + ReluTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/reshape.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/reshape.hpp index 290e028dc5f3e9..cb1b3a28456f03 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/reshape.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/reshape.hpp @@ -11,12 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ReshapeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ReshapeTransformation : public LayerTransformation { public: - ReshapeTransformation(const Params& params) : LayerTransformation(params) {} - ~ReshapeTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + ReshapeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/attribute_parameters.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/attribute_parameters.hpp new file mode 100644 index 00000000000000..6789bc73ae564f --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/attribute_parameters.hpp @@ -0,0 +1,14 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "low_precision/lpt_visibility.hpp" + +class LP_TRANSFORMATIONS_API AttributeParameters { +public: + AttributeParameters(const ngraph::element::Type deqPrecision = ngraph::element::f32) : deqPrecision(deqPrecision) {} + ngraph::element::Type deqPrecision; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp new file mode 100644 index 00000000000000..b8aabf3718db4b --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/rt_info/precision_preserved_attribute.hpp" + +namespace ngraph { +class LP_TRANSFORMATIONS_API AvgPoolPrecisionPreservedAttribute : public PrecisionPreservedAttribute { +}; + +using AvgPoolPrecisionPreservedAttributePtr = std::shared_ptr; + +extern template class LP_TRANSFORMATIONS_API VariantImpl; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper : public VariantImpl { +public: + static constexpr VariantTypeInfo type_info{ "LowPrecision::AvgPoolPrecisionPreserved", 0 }; + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + AvgPoolPrecisionPreservedAttributePtr get() { return this->m_value; } + + void merge(std::vector>>>& attributes); + std::string to_string() override; +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp new file mode 100644 index 00000000000000..3c723a444055c4 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/intervals_alignment_attribute.hpp @@ -0,0 +1,88 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include + +#include "low_precision/rt_info/shared_value_attribute.hpp" +#include "low_precision/rt_info/attribute_parameters.hpp" +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +class IntervalsAlignmentAttribute; + +class LP_TRANSFORMATIONS_API IntervalsAlignmentSharedValue : public SharedValue { +public: + class Interval { + public: + Interval() = default; + Interval(const float low, const float high) : low(low), high(high) {} + float low; + float high; + }; + + IntervalsAlignmentSharedValue() = default; + IntervalsAlignmentSharedValue( + const Interval& combinedInterval, + const Interval& minInterval, + const size_t minLevels) : + combinedInterval(combinedInterval), + minInterval(minInterval), + minLevels(minLevels) {} + + Interval combinedInterval; + Interval minInterval; + size_t minLevels; + // preferable precisions which are preferred by affected quantization operations to avoid zero points + std::set preferablePrecisions; + +#ifdef LPT_DEBUG + std::string minLevelsOperation; +#endif +}; + +class LP_TRANSFORMATIONS_API IntervalsAlignmentAttribute : public SharedValueAttribute { +public: + IntervalsAlignmentAttribute() = default; + IntervalsAlignmentAttribute(IntervalsAlignmentSharedValue::Interval combinedInterval, size_t levels); + IntervalsAlignmentAttribute( + const IntervalsAlignmentSharedValue::Interval combinedInterval, + const size_t levels, + const IntervalsAlignmentSharedValue::Interval minInterval, + const size_t minLevels); + + // specify subgraph original levels + size_t levels; +}; + +using IntervalsAlignmentAttributePtr = std::shared_ptr; + +extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper> : + public VariantImpl> { +public: + static constexpr VariantTypeInfo type_info{ "LowPrecision::IntervalsAlignment", 0 }; + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + std::shared_ptr get() const { return this->m_value; } + + static std::shared_ptr>> create( + const std::shared_ptr& node, + const AttributeParameters& params); + void merge(std::vector>>>& attributes); + std::string to_string() override; +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/per_tensor_quantization_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/per_tensor_quantization_attribute.hpp new file mode 100644 index 00000000000000..1001df8bffeaf7 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/per_tensor_quantization_attribute.hpp @@ -0,0 +1,33 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include "low_precision/rt_info/shared_value_attribute.hpp" +#include "low_precision/layer_transformation.hpp" +#include "attribute_parameters.hpp" + +namespace ngraph { +class LP_TRANSFORMATIONS_API PerTensorQuantizationAttribute { +}; + +extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper : public VariantImpl { +public: + static constexpr VariantTypeInfo type_info { "LowPrecision::PerTensorQuantization", 0 }; + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp new file mode 100644 index 00000000000000..bf109407d008e9 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precision_preserved_attribute.hpp @@ -0,0 +1,51 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/rt_info/shared_value_attribute.hpp" + +namespace ngraph { + +class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute; + +class LP_TRANSFORMATIONS_API PrecisionPreservedSharedValue : public SharedValue { +public: + PrecisionPreservedSharedValue() = default; + PrecisionPreservedSharedValue(const bool value) : value(value) {} + bool value; +}; + +class LP_TRANSFORMATIONS_API PrecisionPreservedAttribute : public SharedValueAttribute { +public: + PrecisionPreservedAttribute() = default; + PrecisionPreservedAttribute(const bool value); +}; + +using PrecisionPreservedAttributePtr = std::shared_ptr; + +extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper : public VariantImpl { +public: + static constexpr VariantTypeInfo type_info{ "LowPrecision::PrecisionPreserved", 0 }; + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + PrecisionPreservedAttributePtr get() { return this->m_value; } + + std::string to_string() override; +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp new file mode 100644 index 00000000000000..5fc08c17926a98 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/precisions_attribute.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include "low_precision/layer_transformation.hpp" +#include "low_precision/lpt_visibility.hpp" +#include "low_precision/rt_info/attribute_parameters.hpp" +#include "low_precision/rt_info/shared_value_attribute.hpp" + +namespace ngraph { + +class PrecisionsAttribute; + +class LP_TRANSFORMATIONS_API PrecisionsSharedValue : public SharedValue { +public: + std::vector precisions; +}; + +using PrecisionsAttributePtr = std::shared_ptr; + +class LP_TRANSFORMATIONS_API PrecisionsAttribute : public SharedValueAttribute { +public: + static const std::vector defaultPrecisions; + PrecisionsAttribute(const std::vector& precisions = defaultPrecisions); +}; + +extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl>; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper> : public VariantImpl> { +public: + static constexpr VariantTypeInfo type_info{ "LowPrecision::Precisions", 0 }; + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + std::shared_ptr init(const std::shared_ptr& node) override; + + std::shared_ptr get() { return this->m_value; } + + // create attribute instance for node + static std::shared_ptr>> create( + const std::shared_ptr& node, + const AttributeParameters& params); + // merge attribute instances which can be got from different sources: node, input port or output port + void merge(std::vector>>>& attributes); + // vizualize shared attributes details in VizualizeTree pass + std::string to_string() override; +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp new file mode 100644 index 00000000000000..198301a9c4aef2 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/quantization_alignment_attribute.hpp @@ -0,0 +1,60 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include "shared_value_attribute.hpp" +#include "attribute_parameters.hpp" + +namespace ngraph { +class QuantizationAlignmentAttribute; + +class LP_TRANSFORMATIONS_API QuantizationAlignmentSharedValue : public SharedValue { +public: + QuantizationAlignmentSharedValue(const bool value = false) : value(value) {} + bool value; +}; + +class LP_TRANSFORMATIONS_API QuantizationAlignmentAttribute : public SharedValueAttribute{ +public: + QuantizationAlignmentAttribute(const bool value = false); +}; + +using QuantizationAlignmentAttributePtr = std::shared_ptr; + +extern template class LP_TRANSFORMATIONS_API ngraph::VariantImpl; + +template<> +class LP_TRANSFORMATIONS_API VariantWrapper> : + public VariantImpl> { +public: + static constexpr VariantTypeInfo type_info{ "LowPrecision::QuantizationAlignment", 0 }; + + const VariantTypeInfo& get_type_info() const override { + return type_info; + } + + VariantWrapper(const value_type& value) : VariantImpl(value) {} + + std::shared_ptr init(const std::shared_ptr& node) override; + + std::shared_ptr get() { return this->m_value; } + + static std::shared_ptr>> create( + const std::shared_ptr& node, + const AttributeParameters& params); + void merge(std::vector>>>& attributes); + std::string to_string() override; +}; +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/shared_value_attribute.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/shared_value_attribute.hpp new file mode 100644 index 00000000000000..706ff46d590fa6 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/rt_info/shared_value_attribute.hpp @@ -0,0 +1,59 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include + +template +class LP_TRANSFORMATIONS_API SharedValue; + +template +class LP_TRANSFORMATIONS_API SharedValueAttribute { +public: + SharedValueAttribute() : sharedValue(std::make_shared()) {} + virtual ~SharedValueAttribute() = default; + std::shared_ptr sharedValue; + std::string get_string() { + std::stringstream ss; + + const size_t rawPointer = (size_t)this; + ss << rawPointer << ": "; + + const size_t sharedValueRawPointer = (size_t)sharedValue.get(); + ss << "sharedValue: " << sharedValueRawPointer; + + bool firstAttribute = true; + ss << ", attributes: {"; + for (auto& attributeWeakPtr : sharedValue->attributes) { + auto attribute = attributeWeakPtr.lock(); + if (attribute == nullptr) { + continue; + } + + if (!firstAttribute) { + ss << ", "; + } + ss << (size_t)attribute.get(); + firstAttribute = false; + } + ss << "}, "; + return ss.str(); + } +}; + +template +class LP_TRANSFORMATIONS_API SharedValue { +public: + virtual ~SharedValue() = default; + std::vector> attributes; +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/shuffle_channels.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/shuffle_channels.hpp index 42124d4b7b101b..ab28d754598e67 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/shuffle_channels.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/shuffle_channels.hpp @@ -11,11 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API ShuffleChannelsTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API ShuffleChannelsTransformation : public LayerTransformation { public: - ShuffleChannelsTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override; + NGRAPH_RTTI_DECLARATION; + ShuffleChannelsTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp index 5a9fbc48ce7916..d4f2c72b8beb7b 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/split.hpp @@ -13,11 +13,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API SplitTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API SplitTransformation : public LayerTransformation { public: - SplitTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override; + NGRAPH_RTTI_DECLARATION; + SplitTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; void updateOutputs( diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/squeeze.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/squeeze.hpp index df4d3576a2b68d..fab050564c8bc0 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/squeeze.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/squeeze.hpp @@ -11,11 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API SqueezeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API SqueezeTransformation : public LayerTransformation { public: - SqueezeTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + SqueezeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/strided_slice.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/strided_slice.hpp index 2228020d45988c..5a0520f54ae9b1 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/strided_slice.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/strided_slice.hpp @@ -12,11 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API StridedSliceTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API StridedSliceTransformation : public LayerTransformation { public: - StridedSliceTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) const override; + NGRAPH_RTTI_DECLARATION; + StridedSliceTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher& m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/subtract.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/subtract.hpp index e0beb34946ae88..56c66d9945040b 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/subtract.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/subtract.hpp @@ -11,12 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API SubtractTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API SubtractTransformation : public LayerTransformation { public: - SubtractTransformation(const Params& params) : LayerTransformation(params) {} - ~SubtractTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + SubtractTransformation(const Params& params); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/subtract_multiply_to_multiply_add.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/subtract_multiply_to_multiply_add.hpp index 62bcd527663a6e..cee4f4f5d627e1 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/subtract_multiply_to_multiply_add.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/subtract_multiply_to_multiply_add.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API SubtractMultiplyToMultiplyAddTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API SubtractMultiplyToMultiplyAddTransformation : public LayerTransformation { public: - SubtractMultiplyToMultiplyAddTransformation(const Params& params) : LayerTransformation(params) {} - ~SubtractMultiplyToMultiplyAddTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + SubtractMultiplyToMultiplyAddTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transformation_context.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transformation_context.hpp index 0419cac1256cc8..1aad5e55bd648e 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/transformation_context.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/transformation_context.hpp @@ -13,8 +13,9 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API TransformationContext { +class LP_TRANSFORMATIONS_API TransformationContext { public: + TransformationContext(); explicit TransformationContext(std::shared_ptr function); std::shared_ptr function; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp deleted file mode 100644 index 9e096c85ce458a..00000000000000 --- a/inference-engine/src/low_precision_transformations/include/low_precision/transformer.hpp +++ /dev/null @@ -1,316 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include - -#include -#include - -#include "layer_transformation.hpp" -#include "iparams_manager.hpp" -#include "ilayer_transformations_manager.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -struct StandaloneCleanup { - std::string typeName; - std::string typeId; - LayerTransformationPtr transformation; -}; - -class TRANSFORMATIONS_API LowPrecisionTransformations { -public: - LowPrecisionTransformations() {} - LowPrecisionTransformations( - const std::map& branchSpecificTransformations, - const std::map& decompositionTransformations, - const std::map& transformations, - const std::map>>& cleanupTransformations, - const std::vector& standaloneCleanupTransformations); - - void setUpdatePrecisions(const bool updatePrecisions); - void setQuantizedTensorAlignmentOnActivations(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations); - void setQuantizedTensorAlignmentOnWeights(const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights); - - /** - * Remove branch specific transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& removeBranchSpecific() { - const std::string operationType = getType(); - const std::string transformationType = typeid(Transformation).name(); - - for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) { - const auto& tranformationPtr = *it->second; - if ((it->first == operationType) && (typeid(tranformationPtr).name() == transformationType)) { - branchSpecificTransformations.erase(it); - break; - } - } - return *this; - } - - /** - * Remove transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& remove() { - const std::string operationType = getType(); - const std::string transformationType = typeid(Transformation).name(); - - for (auto it = transformations.begin(); it != transformations.end(); ++it) { - const auto& tranformationPtr = *it->second; - if ((it->first == operationType) && (typeid(tranformationPtr).name() == transformationType)) { - transformations.erase(it); - break; - } - } - return *this; - } - - /** - * Remove cleanup transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& removeCleanup() { - const std::string operationType = getType(); - const std::string transformationType = typeid(Transformation).name(); - - const auto it = cleanupTransformations.find(operationType); - if (it != cleanupTransformations.end()) { - const auto it1 = std::find_if(it->second.begin(), it->second.end(), - [&](const std::pair& transformation) { - return transformation.first == transformationType; - }); - if (it1 != it->second.end()) { - it->second.erase(it1); - if (it->second.empty()) { - cleanupTransformations.erase(it); - } - } - } - return *this; - } - - /** - * Remove standalone cleanup transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& removeStandaloneCleanup() { - const std::string operationType = getType(); - const std::string transformationType = typeid(Transformation).name(); - - for (auto it = standaloneCleanupTransformations.begin(); it != standaloneCleanupTransformations.end(); ++it) { - const auto& standaloneCleanup = *it; - if ((operationType == standaloneCleanup.typeName) && (transformationType == standaloneCleanup.typeId)) { - standaloneCleanupTransformations.erase(it); - break; - } - } - return *this; - } - - template - LowPrecisionTransformations& removeAll() { - removeBranchSpecific(); - remove(); - removeCleanup(); - removeStandaloneCleanup(); - - return *this; - } - - /** - * Add branch specific transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& addBranchSpecific(const LayerTransformation::Params& params) { - const std::string typeName = getType(); - const auto it = branchSpecificTransformations.find(typeName); - if (it != branchSpecificTransformations.end()) { - branchSpecificTransformations.erase(it); - } - - branchSpecificTransformations.emplace(typeName, std::make_shared(params)); - return *this; - } - - /** - * Add decomposition transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& addDecomposition(const LayerTransformation::Params& params) { - const std::string typeName = getType(); - const auto it = decompositionTransformations.find(typeName); - if (it != decompositionTransformations.end()) { - decompositionTransformations.erase(it); - } - - decompositionTransformations.emplace(typeName, std::make_shared(params)); - return *this; - } - - /** - * Add transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& add(const LayerTransformation::Params& params) { - const std::string typeName = getType(); - const auto it = transformations.find(typeName); - if (it != transformations.end()) { - transformations.erase(it); - } - - transformations.emplace(typeName, std::make_shared(params)); - return *this; - } - - /** - * Add cleanup transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& addCleanup(const LayerTransformation::Params& params) { - const std::string typeName = getType(); - const std::string typeId = typeid(Transformation).name(); - const auto it = cleanupTransformations.find(typeName); - if (it == cleanupTransformations.end()) { - cleanupTransformations.emplace(typeName, - std::vector>{ std::make_pair(typeId, std::make_shared(params)) }); - } else { - const auto it1 = std::find_if(it->second.begin(), it->second.end(), - [&](const std::pair& transformation) { - return transformation.first == typeName; - }); - if (it1 != it->second.end()) { - it->second.erase(it1); - } - it->second.emplace_back(std::make_pair(typeId, std::make_shared(params))); - } - return *this; - } - - /** - * Add cleanup transformation. Transformation type and operation type are required. - * Operation type is used to find transformation by operation during precision definition. - */ - template - LowPrecisionTransformations& addStandaloneCleanup(const LayerTransformation::Params& params) { - const std::string typeName = getType(); - const std::string typeId = typeid(Transformation).name(); - const auto it = std::find_if(standaloneCleanupTransformations.begin(), standaloneCleanupTransformations.end(), - [&](const StandaloneCleanup& transformation) { - return transformation.typeName == typeName && transformation.typeId == typeId; - }); - if (it == standaloneCleanupTransformations.end()) { - standaloneCleanupTransformations.emplace_back(StandaloneCleanup{ typeName, typeId, std::make_shared(params) }); - } else { - *it = { typeName, typeId, std::make_shared(params) }; - } - - return *this; - } - - template - static std::string getType() { - return Operation::get_type_info_static().name; - } - - static std::string getType(const Node& operation) { - return operation.get_type_name(); - } - - std::vector find(const std::string& transformationName) const; - - template - std::vector find() const { - const std::string transformationKey = getType(); - return find(transformationKey); - } - - void setParamsManager(IParamsManager* paramsManager) noexcept; - void setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept; - - // Key is not a layer type, but just a name of transformation - // Layer type (or a pattern) is defined by transformation itself as an ngraph matcher - std::map branchSpecificTransformations; - std::map decompositionTransformations; - std::map transformations; - std::map>> cleanupTransformations; - std::vector standaloneCleanupTransformations; - -private: - static void setParamsManager(IParamsManager* paramsManager, std::map& transformations) noexcept; - static void setParamsManager( - IParamsManager* paramsManager, - std::map>>& transformations) noexcept; - static void setParamsManager(IParamsManager* paramsManager, std::vector& transformations) noexcept; - static void setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::map& transformations) noexcept; - static void setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::map>>& transformations) noexcept; - static void setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::vector& transformations) noexcept; -}; - -/** - * @brief low precision transformation component. - */ -class TRANSFORMATIONS_API LowPrecisionTransformer : public IParamsManager, ILayerTransformationsManager { -public: - static LowPrecisionTransformations getAllTransformations(const LayerTransformation::Params& params = LayerTransformation::Params()); - - static bool isFunctionQuantized(const std::shared_ptr& function); - - LowPrecisionTransformer(); - LowPrecisionTransformer(const LowPrecisionTransformations& transformations); - void transform(std::shared_ptr network); - - // IParamsManager interface implementation - std::vector getPrecisionsOnActivations(const Node& op) const noexcept override; - - // ILayerTransformationsManager interface implementation - bool isQuantized(const std::shared_ptr& layer) const noexcept override; - bool isPrecisionPreserved(const std::shared_ptr& layer) const noexcept override; - -private: - LowPrecisionTransformations transformations; - - void registerAllMatchers( - std::map transformations, - GraphRewrite& pass, - TransformationContext& context); - - void registerAllMatchers( - std::map>> transformations, - GraphRewrite& pass, - TransformationContext& context); -}; - -class TRANSFORMATIONS_API TypeRelaxedReplacer : public GraphRewrite { -public: - TypeRelaxedReplacer(); -}; - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transparent_base_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transparent_base_transformation.hpp index d915515b598197..05b0dbebc0191f 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/transparent_base_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/transparent_base_transformation.hpp @@ -12,11 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API TransparentBaseTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API TransparentBaseTransformation : public LayerTransformation { public: TransparentBaseTransformation(const Params& params) : LayerTransformation(params) {} ~TransparentBaseTransformation() override {}; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/transpose.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/transpose.hpp index 3b41f3d48b25a7..d22fcc8ed8cf36 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/transpose.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/transpose.hpp @@ -12,12 +12,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API TransposeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API TransposeTransformation : public LayerTransformation { public: - TransposeTransformation(const Params& params) : LayerTransformation(params) {} - ~TransposeTransformation() override {} - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + TransposeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr op) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/unsqueeze.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/unsqueeze.hpp index ea166c979120ab..580c09ad80bcce 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/unsqueeze.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/unsqueeze.hpp @@ -11,11 +11,11 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API UnsqueezeTransformation : public LayerTransformation { +class LP_TRANSFORMATIONS_API UnsqueezeTransformation : public LayerTransformation { public: - UnsqueezeTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) const override; + NGRAPH_RTTI_DECLARATION; + UnsqueezeTransformation(const Params& params = Params()); + bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; }; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/update_shared_precision_preserved.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/update_shared_precision_preserved.hpp new file mode 100644 index 00000000000000..119ae13c412126 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/include/low_precision/update_shared_precision_preserved.hpp @@ -0,0 +1,107 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include + +#include "low_precision/network_helper.hpp" +#include "low_precision/lpt_itt.hpp" +#include "low_precision/lpt_visibility.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +template +class UpdateSharedPrecisionPreserved; + +} // namespace low_precision +} // namespace pass +} // namespace ngraph + +template +class ngraph::pass::low_precision::UpdateSharedPrecisionPreserved : public ngraph::pass::MatcherPass { +public: + UpdateSharedPrecisionPreserved() { + ngraph::graph_rewrite_callback callback = [&](pattern::Matcher& m) { + auto node = m.get_match_root(); + + const bool needToCheckExpectedAttributeType = !std::is_same::value; + if (!needToCheckExpectedAttributeType) { + // expected attribute is ignored, set attributes for node inputs except Result & FakeQuantize operations + if (is_type(node) || + is_type(node) || + transformation_callback(node)) { + return false; + } + } + + if (ngraph::pass::low_precision::NetworkHelper::isPrecisionPreserved(node) || is_type(node)) { + return false; + } + + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "UpdateSharedPrecisionPreserved"); + + // TODO: check if node can be quantized, if not, then doesn't update + for (auto input : node->inputs()) { + auto precisionsAttributeWrapper = getAttribute(input); + if (precisionsAttributeWrapper != nullptr) { + const auto precisionsAttribute = precisionsAttributeWrapper->get(); + assert(precisionsAttribute != nullptr); + if (precisionsAttribute->sharedValue->precisions.empty()) { + return false; + } + } + } + + for (auto input : node->inputs()) { + if (needToCheckExpectedAttributeType) { + if (getAttribute(input) == nullptr) { + return false; + } + } + auto parentAttribute = getSourceAttribute(input); + if (parentAttribute == nullptr) { + continue; + } + + parentAttribute->get()->sharedValue->value = true; + } + } + + return true; + }; + + auto matcher = std::make_shared(pattern::any_input(), "PropagateThroughPrecisionPreserved"); + this->register_matcher(matcher, callback); + } + +private: + Input getDequantizationInput(const Input& input) { + const auto dequantization = NetworkHelper::getDequantization(input.get_node()->shared_from_this(), input.get_index()); + if (!dequantization.empty() && + (is_type(dequantization.data.get_node())) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + assert(dequantization.data.get_target_inputs().size() == 1ul); + return *dequantization.data.get_target_inputs().begin(); + } + return input; + } + + std::shared_ptr> getSourceAttribute(const Input& input) { + const auto dequantizationInput = getDequantizationInput(input); + const auto output = dequantizationInput.get_source_output(); + auto attribute = ngraph::pass::low_precision::getAttribute(output.get_node()->shared_from_this()); + if (attribute == nullptr) { + attribute = ngraph::pass::low_precision::getAttribute(output.get_node_shared_ptr()); + } + return attribute; + } +}; diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp index e7cab0c527c10e..014b3775fe75b8 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/variadic_split.hpp @@ -13,10 +13,10 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API VariadicSplitTransformation : public SplitTransformation { +class LP_TRANSFORMATIONS_API VariadicSplitTransformation : public SplitTransformation { public: - VariadicSplitTransformation(const Params& params); - void registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const override; + NGRAPH_RTTI_DECLARATION; + VariadicSplitTransformation(const Params& params = Params()); }; } // namespace low_precision } // namespace pass diff --git a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp index aeb0a6d9abd576..d2b5823fd3d16d 100644 --- a/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp +++ b/inference-engine/src/low_precision_transformations/include/low_precision/weightable_layer_transformation.hpp @@ -13,21 +13,30 @@ namespace ngraph { namespace pass { namespace low_precision { -class TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransformation{ +class LP_TRANSFORMATIONS_API WeightableLayerTransformation : public LayerTransformation{ public: WeightableLayerTransformation(const Params& params); bool canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const override; bool canConvolutionBeTransformed(const TransformationContext& context, std::shared_ptr layer) const; - bool isQuantized(std::shared_ptr layer, bool reshapeIsRequired) const noexcept; bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; + static bool checkPrecisionOnActivation( + const std::shared_ptr& node, + const std::vector& supportedPrecisionsOnActivations) { + return true; + } + + static bool isQuantizedStatic(const std::shared_ptr& layer, const bool reshapeIsRequired) noexcept; + protected: - void decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& weightableLayer, size_t outChannelsShapeIndex = 0ul) const; + bool decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& weightableLayer, size_t outChannelsShapeIndex = 0ul) const; static bool isGroup(const std::shared_ptr& node); static bool isDepthwise(const std::shared_ptr& node); - std::shared_ptr getFakeQuantizeOnWeights(const std::shared_ptr& node) const; - DataPrecision getDataPrecisionOnWeights(const std::shared_ptr& node) const; +public: + static std::shared_ptr getFakeQuantizeOnWeights(const std::shared_ptr& node); + static DataPrecision getDataPrecisionOnWeights(const std::shared_ptr& node); + static bool isAsymmetricOnWeights(const std::shared_ptr& node); }; } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/src/add.cpp b/inference-engine/src/low_precision_transformations/src/add.cpp index 915e87d2f60803..4ecd8464370c94 100644 --- a/inference-engine/src/low_precision_transformations/src/add.cpp +++ b/inference-engine/src/low_precision_transformations/src/add.cpp @@ -10,6 +10,7 @@ #include #include +#include #include "ngraph_ops/type_relaxed.hpp" #include "low_precision/common/ie_lpt_exception.hpp" @@ -20,6 +21,8 @@ namespace ngraph { namespace pass { namespace low_precision { +NGRAPH_RTTI_DEFINITION(AddTransformation, "AddTransformation", 0); + std::shared_ptr replaceToSubtract(const std::shared_ptr& op) { // TODO: separate this part to standalone transformation: AddToSubtractTransformation // motivation: @@ -88,11 +91,22 @@ std::shared_ptr fuseWithSubtract(const std::shared_ptr& return newSubtract; } -void AddTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +AddTransformation::AddTransformation(const Params& params) : EltwiseBaseTransformation(params) { + auto matcher = ngraph::pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "AddTransformation"); + this->register_matcher(m, callback); } -bool AddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool AddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr op = as_type_ptr(m.get_match_root()); if ((op == nullptr) || (!canBeTransformed(context, op))) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/align_quantization_intervals.cpp b/inference-engine/src/low_precision_transformations/src/align_quantization_intervals.cpp new file mode 100644 index 00000000000000..728161d0207aa1 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/align_quantization_intervals.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/align_quantization_intervals.hpp" +#include +#include +#include "low_precision/create_attribute.hpp" +#include "low_precision/propagate_through_precision_preserved.hpp" +#include "low_precision/rt_info/intervals_alignment_attribute.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AlignQuantizationIntervals, "AlignQuantizationIntervals", 0); + +bool ngraph::pass::low_precision::AlignQuantizationIntervals::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager manager; + manager.set_per_pass_validation(false); + std::shared_ptr intervalsAlignment = manager.register_pass(); + intervalsAlignment->add_matcher>(); + intervalsAlignment->add_matcher>(); + manager.run_passes(f); + return false; +} diff --git a/inference-engine/src/low_precision_transformations/src/align_quantization_parameters.cpp b/inference-engine/src/low_precision_transformations/src/align_quantization_parameters.cpp new file mode 100644 index 00000000000000..72d4ed1184c694 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/align_quantization_parameters.cpp @@ -0,0 +1,27 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/align_quantization_parameters.hpp" +#include +#include "low_precision/create_attribute.hpp" +#include "low_precision/propagate_through_precision_preserved.hpp" +#include "low_precision/rt_info/quantization_alignment_attribute.hpp" +#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp" +#include "low_precision/update_shared_precision_preserved.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AlignQuantizationParameters, "AlignQuantizationParameters", 0); + +bool ngraph::pass::low_precision::AlignQuantizationParameters::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager manager; + manager.set_per_pass_validation(false); + std::shared_ptr propagation = manager.register_pass(); + propagation->add_matcher>(); + propagation->add_matcher>(); + propagation->add_matcher>(); + manager.run_passes(f); + return false; +} diff --git a/inference-engine/src/low_precision_transformations/src/avg_pool.cpp b/inference-engine/src/low_precision_transformations/src/avg_pool.cpp index 3af973904e4be1..1fde22ec550f5e 100644 --- a/inference-engine/src/low_precision_transformations/src/avg_pool.cpp +++ b/inference-engine/src/low_precision_transformations/src/avg_pool.cpp @@ -7,39 +7,39 @@ #include #include #include +#include #include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/precision_preserved_attribute.hpp" namespace ngraph { namespace pass { namespace low_precision { -AvgPoolTransformation::AvgPoolTransformation(const Params& params) : LayerTransformation(params) { -} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::AvgPoolTransformation, "AvgPoolTransformation", 0); -void AvgPoolTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label() })); +AvgPoolTransformation::AvgPoolTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "AvgPoolTransformation"); + this->register_matcher(m, callback); } -bool AvgPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool AvgPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } const std::shared_ptr pooling = NetworkHelper::separateInStandaloneBranch(m.get_match_root()); - - const std::vector> children = getChildrenRecursivelyExceptPrecisionPreserved(pooling); - - bool updatePrecision; - if ((children.size() == 1ul) && (!this->layerTransformationsManager->isQuantized(children[0]))) { - updatePrecision = false; - } else { - updatePrecision = NetworkHelper::notAllChildrensAreFQ(children); - } - + const bool updatePrecision = isPrecisionPreserved(pooling); moveDequantizationAfter(context, pooling, NetworkHelper::getDequantization(pooling), updatePrecision); return true; } @@ -55,8 +55,7 @@ bool AvgPoolTransformation::canBeTransformed(const TransformationContext& contex } bool AvgPoolTransformation::isPrecisionPreserved(std::shared_ptr layer) const noexcept { - const std::vector> children = getChildrenRecursivelyExceptPrecisionPreserved(layer); - return NetworkHelper::notAllChildrensAreFQ(children); + return NetworkHelper::isPrecisionPreserved(layer); } } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/src/base_matcher_pass.cpp b/inference-engine/src/low_precision_transformations/src/base_matcher_pass.cpp new file mode 100644 index 00000000000000..2514559179edb1 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/base_matcher_pass.cpp @@ -0,0 +1,13 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/base_matcher_pass.hpp" +#include +#include "low_precision/rt_info/attribute_parameters.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +ngraph::pass::low_precision::BaseMatcherPass::BaseMatcherPass(const AttributeParameters& params) : params(params) { +} diff --git a/inference-engine/src/low_precision_transformations/src/clamp.cpp b/inference-engine/src/low_precision_transformations/src/clamp.cpp index 56cee1d88a497b..45c4cd5986c1a1 100644 --- a/inference-engine/src/low_precision_transformations/src/clamp.cpp +++ b/inference-engine/src/low_precision_transformations/src/clamp.cpp @@ -6,21 +6,32 @@ #include #include #include + +#include #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ClampTransformation::ClampTransformation(const Params& params) : LayerTransformation(params) {} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ClampTransformation, "ClampTransformation", 0); + +ClampTransformation::ClampTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void ClampTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label() })); + auto m = std::make_shared(matcher, "ClampTransformation"); + this->register_matcher(m, callback); } -bool ClampTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const { +bool ClampTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { auto subWithTheSameValues = [](std::shared_ptr sub) { if (sub == nullptr) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/common/operation_precision_restriction.cpp b/inference-engine/src/low_precision_transformations/src/common/operation_precision_restriction.cpp new file mode 100644 index 00000000000000..0ec085d7245129 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/common/operation_precision_restriction.cpp @@ -0,0 +1,19 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/common/operation_precision_restriction.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/precisions_attribute.hpp" + +using namespace ngraph; + diff --git a/inference-engine/src/low_precision_transformations/src/concat.cpp b/inference-engine/src/low_precision_transformations/src/concat.cpp index 622550794b29ab..0863dcb3f09763 100644 --- a/inference-engine/src/low_precision_transformations/src/concat.cpp +++ b/inference-engine/src/low_precision_transformations/src/concat.cpp @@ -11,11 +11,11 @@ #include #include +#include #include #include "low_precision/common/fake_quantize_dequantization.hpp" #include "low_precision/common/ie_lpt_exception.hpp" -#include "low_precision/common/subgraph.hpp" #include "low_precision/common/dequantization_op.hpp" #include "low_precision/network_helper.hpp" @@ -23,218 +23,155 @@ namespace ngraph { namespace pass { namespace low_precision { -void ConcatTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addSingleNodePattern(pass, context); -} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConcatTransformation, "ConcatTransformation", 0); -bool ConcatTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { - std::shared_ptr concat = ngraph::as_type_ptr(m.get_match_root()); - if (!canBeTransformed(context, concat)) { - return false; - } +ConcatTransformation::ConcatTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = ngraph::pattern::wrap_type(); - ngraph::pass::low_precision::Subgraph subgraph(layerTransformationsManager); - std::unordered_set handledLayers; - if (!subgraph.fillSubgraphForConcat(concat, handledLayers)) { - return false; - } + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } - if (subgraph.quantizationLayers.empty() || isHandled(context, subgraph.quantizationLayers)) { - return false; - } + return transform(*context, m); + }; - // Concat operations precision is defined: - // 1. consumers after Concat - // 2. FakeQuantize precisions without zero point - ngraph::Node& quantizationLayer = *subgraph.quantizationLayers[0]; - std::shared_ptr fq = ngraph::as_type_ptr(quantizationLayer.shared_from_this()); - if (!NetworkHelper::isQuantizeSupported(fq)) { - return false; - } - DataPrecision dataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false); - if (dataPrecision.precision == ngraph::element::undefined) { + auto m = std::make_shared(matcher, "ConcatTransformation"); + this->register_matcher(m, callback); +} + +bool ConcatTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { + std::shared_ptr concat = ngraph::as_type_ptr(m.get_match_root()); + if (!canBeTransformed(context, concat)) { return false; } - std::vector concatChildrenPrecisions = precisionsOnActivations; - - for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { - fq = ngraph::as_type_ptr(subgraph.quantizationLayers[i]); - if (fq == nullptr) { + std::vector layerDequantizations; + layerDequantizations.reserve(concat->get_input_size()); + for (size_t parentIndex = 0ul; parentIndex < concat->get_input_size(); parentIndex++) { + FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(concat, parentIndex); + if (dequantization.empty()) { return false; } + layerDequantizations.push_back(dequantization); + } - if (!NetworkHelper::isQuantizeSupported(fq)) { - return false; + bool allDequantizationShiftAreZero = true; + bool allDequantizationMultiplyAreZero = true; + for (const auto& dequantization : layerDequantizations) { + if (dequantization.subtract != nullptr) { + allDequantizationShiftAreZero = false; } - const QuantizationDetails& quantizationDetails = QuantizationDetails::getDetails(fq); - - // per tensor scale is supported only - if (quantizationDetails.inputHighValues.size() != 1ul) { - return false; + if (dequantization.multiply != nullptr) { + allDequantizationMultiplyAreZero = false; } - // define concatenation operation consumers precisions - std::vector fqChildrenPrecisions = precisionsOnActivations; - fillAvailablePrecisions(subgraph.quantizationLayers[i], fqChildrenPrecisions); - concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions); - if (concatChildrenPrecisions.empty()) { - return false; + if (!allDequantizationShiftAreZero && !allDequantizationMultiplyAreZero) { + break; } + } - // define FakeQuantize precisions without zero point - const DataPrecision dataPrecision2 = getDataPrecision(subgraph.quantizationLayers[i]->shared_from_this(), quantizationDetails, false); - if (dataPrecision2.precision == ngraph::element::undefined) { - return false; - } + auto broadcastElementWiseConst = []( + // FakeQuantize constant shape must be broadcastable to the shape on data. + std::shared_ptr operation, + const ngraph::Shape targetShape) -> std::shared_ptr { + auto targetShapeConst = std::make_shared( + element::i64, ngraph::Shape{ targetShape.size() }, + targetShape); - if (dataPrecision.precision != dataPrecision2.precision) { - dataPrecision = dataPrecision.precision.is_signed() ? dataPrecision : dataPrecision2; - } - } + auto broadcast = ngraph::pass::low_precision::fold( + operation, + targetShapeConst, + ngraph::op::AutoBroadcastType::NUMPY); - if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) { - dataPrecision = DataPrecision(concatChildrenPrecisions[0]); - } + return broadcast; + }; - std::vector quantizationLayersDetails; - for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { - std::shared_ptr fakeQuantize = as_type_ptr(subgraph.quantizationLayers[i]); - auto newFakeQuantize = NetworkHelper::fuseConvert(fakeQuantize); - if (newFakeQuantize == nullptr) { - subgraph.quantizationLayers[i] = fakeQuantize; - quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize)); - continue; + bool someDqInLowPrecision = std::any_of( + layerDequantizations.begin(), + layerDequantizations.end(), + [](const FakeQuantizeDequantization& value) { return value.isLowPrecision(); }); + + bool someDqInFpPrecision = std::any_of( + layerDequantizations.begin(), + layerDequantizations.end(), + [](const FakeQuantizeDequantization& value) { return !value.isLowPrecision(); }); + + bool DqWithDifferentPrecision = someDqInLowPrecision && someDqInFpPrecision; + + OutputVector dataNodes; + NodeVector convertNodes; + NodeVector subtractNodes; + NodeVector multiplyNodes; + for (size_t i = 0; i < layerDequantizations.size(); ++i) { + const auto& dequantization = layerDequantizations[i]; + + if (DqWithDifferentPrecision && dequantization.isLowPrecision()) { + dataNodes.push_back(dequantization.convert); + } else { + dataNodes.push_back(dequantization.data); } - fakeQuantize = newFakeQuantize; - newFakeQuantize = NetworkHelper::composeFakeQuantize(fakeQuantize); - if (newFakeQuantize == nullptr) { - subgraph.quantizationLayers[i] = fakeQuantize; - quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize)); - continue; + if (dequantization.convert != nullptr) { + convertNodes.push_back(dequantization.convert); } - fakeQuantize = newFakeQuantize; - subgraph.quantizationLayers[i] = fakeQuantize; - quantizationLayersDetails.push_back(QuantizationDetails::getDetails(fakeQuantize)); - } - - FakeQuantizeDequantization dequantization; + Shape targetShape(concat->get_input_partial_shape(i).rank().get_length(), 1ul); + targetShape[1] = concat->get_input_partial_shape(i)[1].get_length(); - if ((quantizationLayersDetails[0].inputHighValues.size() == 1)) { - float outputLowValue = quantizationLayersDetails[0].outputLowValues[0]; - float outputHighValue = quantizationLayersDetails[0].outputHighValues[0]; - - for (size_t index = 0lu; index < subgraph.quantizationLayers.size(); index++) { - const QuantizationDetails& quantizationDetails = quantizationLayersDetails[index]; - if (outputLowValue > quantizationDetails.outputLowValues[0]) { - outputLowValue = quantizationDetails.outputLowValues[0]; - } - if (outputHighValue < quantizationDetails.outputHighValues[0]) { - outputHighValue = quantizationDetails.outputHighValues[0]; - } + if (!allDequantizationShiftAreZero) { + subtractNodes.push_back(dequantization.subtract == nullptr ? + std::make_shared(deqPrecision, targetShape, std::vector({ 0.f })) : + broadcastElementWiseConst(dequantization.subtractConstant, targetShape)); } - if ((outputLowValue == 0.f) && (outputHighValue == 0.f)) { - return false; + if (!allDequantizationMultiplyAreZero) { + multiplyNodes.push_back(dequantization.multiply == nullptr ? + std::make_shared(deqPrecision, targetShape, std::vector({ 1.0f })) : + broadcastElementWiseConst(dequantization.multiplyConstant, targetShape)); } + } - const float maxOutputInterval = outputHighValue - outputLowValue; - if (quantizedTensorAlignmentOnActivations == QuantizedTensorAlignment::UpdateLevel) { - const size_t minLevels = getMinQuantizationLevels( - dataPrecision, - maxOutputInterval, - quantizationLayersDetails, - outputLowValue, - outputHighValue); - if (minLevels < this->minQuantizationLevels) { - return false; - } - } + const auto newConcat = concat->clone_with_new_inputs(dataNodes); - // FQ -> SUB_quantization -> MUL_quantization -[INT8]-> SUB_dequantization -> MUL_dequantization -> - const float quantizationMul = (dataPrecision.max - dataPrecision.min) / maxOutputInterval; - const float dequantizationMul = maxOutputInterval / (dataPrecision.max - dataPrecision.min); - - // FQ outputLowValue = dataPrecision.min * dequantizationMul - quantizationSub - const float quantizationSub = outputLowValue - dataPrecision.min * dequantizationMul; - const float dequantizationSub = std::round(-quantizationSub * quantizationMul); - - // 1. get data for dequantization. Dequantization data will be used several times later. - dequantization = ngraph::pass::low_precision::NetworkHelper::makeDequantization( - dequantizationMul, - dequantizationSub, - subgraph.quantizationLayers[0]->get_output_element_type(0), - subgraph.quantizationLayers[0]->get_output_partial_shape(0), - updatePrecisions ? dataPrecision.precision : subgraph.quantizationLayers[0]->get_output_element_type(0), - deqPrecision); - - for (size_t index = 0; index < subgraph.quantizationLayers.size(); index++) { - std::shared_ptr fakeQuantizeLayer = as_type_ptr( - subgraph.quantizationLayers[index]->shared_from_this()); - - const QuantizationDetails& quantizationDetails = quantizationLayersDetails[index]; - - switch (quantizedTensorAlignmentOnActivations) { - case QuantizedTensorAlignment::None: { - THROW_TRANSFORMATION_EXCEPTION << "not implemented: " << quantizedTensorAlignmentOnActivations; - } - case QuantizedTensorAlignment::UpdateLevel: { - const float updatedOutputLowValue = (quantizationDetails.outputLowValues[0] - quantizationSub) * quantizationMul; - const float updatedOutputHighValue = (quantizationDetails.outputHighValues[0] - quantizationSub) * quantizationMul; - - // 2. update FakeQuantize - one time action - std::shared_ptr newFakeQuantizeLayer = ngraph::pass::low_precision::NetworkHelper::updateFakeQuantize( - fakeQuantizeLayer, - updatePrecisions ? dataPrecision.precision : fakeQuantizeLayer->get_output_element_type(0), - roundf(updatedOutputLowValue), - roundf(updatedOutputHighValue)); - - const size_t levels = static_cast(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0); - newFakeQuantizeLayer->set_levels(levels); - - subgraph.quantizationLayers[index] = newFakeQuantizeLayer; - subgraph.layers[fakeQuantizeLayer->get_friendly_name()] = newFakeQuantizeLayer; - break; - } - default: { - THROW_TRANSFORMATION_EXCEPTION << "unexpected value " << quantizedTensorAlignmentOnActivations; - } - } - } - } else { - return false; + std::shared_ptr lastDequantization = newConcat; + if (!convertNodes.empty()) { + const auto convert = convertNodes[0]->clone_with_new_inputs({ newConcat }); + + NetworkHelper::copyInfo({ concat, convert }, convert); + lastDequantization = convert; } - auto dequantizationValuesCallback = [&]( - std::shared_ptr layer, - std::shared_ptr child, - const std::string originalLayerName, - std::vector& dequantizationsToConcatenate) { - dequantizationsToConcatenate.push_back(dequantization); - }; + // concatenation axis is 1 + if (!subtractNodes.empty()) { + const auto subtract = std::make_shared( + lastDequantization, + NetworkHelper::toScalarIfPossible(subtractNodes.size() == 1ul ? + subtractNodes[0] : + ngraph::pass::low_precision::fold(subtractNodes, 1))); - addDequantizationLayers(context, subgraph, dequantizationValuesCallback); - - if (updatePrecisions) { - for (const auto it : subgraph.layers) { - const std::shared_ptr& node = it.second; - if (std::dynamic_pointer_cast(node) != nullptr) { - ngraph::pass::low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(node->shared_from_this(), dataPrecision.precision); - } else { - // set precision to explicitly to have updated precision during transformation - for (size_t i = 0; i < node->get_output_size(); ++i) { - node->set_output_type(i, dataPrecision.precision, node->get_output_partial_shape(i)); - } - } - } + NetworkHelper::copyInfo({ concat, subtract }, subtract); + lastDequantization = subtract; } - for (const std::shared_ptr& quantizationLayer : subgraph.quantizationLayers) { - context.quantizedFakeQuantizeNames.insert(quantizationLayer->get_friendly_name()); + if (!multiplyNodes.empty()) { + const auto multiply = std::make_shared>( + DequantizationMultiply( + lastDequantization, + NetworkHelper::toScalarIfPossible(multiplyNodes.size() == 1ul ? + multiplyNodes[0] : + ngraph::pass::low_precision::fold(multiplyNodes, 1))), + layerDequantizations[0].multiply->get_output_element_type(0)); + + NetworkHelper::copyInfo({ concat, multiply }, multiply); + lastDequantization = multiply; } + + replace_node(concat, lastDequantization); + NetworkHelper::copyInfo(concat, newConcat); + updateOutput(context, lastDequantization, newConcat); return true; } @@ -251,6 +188,8 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context const auto axis = concat->get_axis(); const auto outPShape = concat->get_output_partial_shape(0); const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, outPShape.rank()); + + // TODO: LPT: to support current flow: #58269 if (normalizedAxis != 1ul) { return false; } @@ -259,6 +198,27 @@ bool ConcatTransformation::canBeTransformed(const TransformationContext& context return false; } + const bool perTensorQuantizationIsRequired = normalizedAxis != 1ul; + + element::Type precision; + for (size_t i = 0ul; i < concat->get_input_size(); i++) { + const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(concat, i); + if (dequantization.empty() || (updatePrecisions && !dequantization.isLowPrecision())) { + return false; + } + + if (precision == element::undefined) { + precision = dequantization.data.get_element_type(); + } else if (precision != dequantization.data.get_element_type()) { + return false; + } + + if (perTensorQuantizationIsRequired && + (((dequantization.subtractConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.subtractConstant)) || + ((dequantization.multiplyConstant != nullptr) && !NetworkHelper::isScalarLike(dequantization.multiplyConstant)))) { + return false; + } + } return true; } @@ -338,115 +298,6 @@ std::shared_ptr ConcatTransformation::concatenateDeqNodes(NodeVector& node return nodes.size() == 1ul ? nodes[0] : fold(nodes, 1); } -void ConcatTransformation::addDequantizationLayers( - TransformationContext& context, - ngraph::pass::low_precision::Subgraph& subgraph, - std::function layer, - std::shared_ptr child, - const std::string originalLayerName, - std::vector& dequantizationsToConcatenate)> getLayerDequantizationCallback) const { - std::unordered_map outputs; - for (size_t i = 0; i < context.function->get_output_size(); ++i) { - ngraph::Node* node = context.function->get_output_op(i).get(); - if (node->get_input_size() != 1ul) { - THROW_IE_LPT_EXCEPTION(*node) << "unexpected inputs count for result node"; - } - - outputs.emplace(node->get_input_node_shared_ptr(0)->get_friendly_name(), node); - } - - std::unordered_map> notHandledSubgraphLayers = subgraph.layers; - while (notHandledSubgraphLayers.size() != 0ul) { - const auto layerIt = notHandledSubgraphLayers.begin(); - std::shared_ptr layer = layerIt->second; - notHandledSubgraphLayers.erase(layerIt); - - std::vector layerDequantizations; - - for (size_t i = 0; i < layer->get_output_size(); ++i) { - const auto childInputs = layer->get_output_target_inputs(i); - for (const auto childInput : childInputs) { - ngraph::Node& child = *childInput.get_node(); - - if (subgraph.layers.find(child.get_friendly_name()) == subgraph.layers.end()) { - std::shared_ptr source = layer; - const std::shared_ptr destination = child.shared_from_this(); - - if (layerDequantizations.size() == 0ul) { - // fill layerDequantizations collection - getLayerDequantizationCallback(source, destination, source->get_friendly_name(), layerDequantizations); - } - - { - NodeVector convertNodes; - NodeVector subtractNodes; - NodeVector multiplyNodes; - - // forming nodes for concatenation - fillDequantizationNodes(layerDequantizations, layer, convertNodes, subtractNodes, multiplyNodes); - - // TODO: the second place (first is FQ decomposition) where dequantization operations are inserted - if (!convertNodes.empty()) { - const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination); - std::shared_ptr convert = - convertNodes[0]->clone_with_new_inputs({ destination->get_input_source_output(sourceOutputIdx) }); - - insert_new_node_between(source, destination, convert); - ngraph::copy_runtime_info({ layer, convert }, convert); - source = convert; - } - - // concatenation axis is 1 - if (!subtractNodes.empty()) { - const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination); - std::shared_ptr subtract = std::make_shared( - destination->get_input_source_output(sourceOutputIdx), - NetworkHelper::toScalarIfPossible(concatenateDeqNodes(subtractNodes))); - - insert_new_node_between(source, destination, subtract); - ngraph::copy_runtime_info({ layer, subtract }, subtract); - source = subtract; - } - - if (!multiplyNodes.empty()) { - const size_t sourceOutputIdx = NetworkHelper::getChildInputIndex(source, destination); - std::shared_ptr multiply = std::make_shared>( - DequantizationMultiply( - destination->get_input_source_output(sourceOutputIdx), - NetworkHelper::toScalarIfPossible(concatenateDeqNodes(multiplyNodes))), - layerDequantizations[0].multiply->get_output_element_type(0)); - - insert_new_node_between(source, destination, multiply); - ngraph::copy_runtime_info({ layer, multiply }, multiply); - source = multiply; - } - } - - // first input is used - const ngraph::element::Type precision = layerDequantizations[0].data.get_element_type(); - layer->set_output_type(0, precision, layer->get_output_partial_shape(0)); - - const auto it = outputs.find(layer->get_friendly_name()); - if (it != outputs.end() && is_type(child.shared_from_this())) { - const std::string originalName = layer->get_friendly_name(); - const std::string newName = layer->get_friendly_name() + LayerTransformation::originalLayerPostfix; - layer->set_friendly_name(newName); - - // Split & VariadicSplit have other naming rules - if (is_type(layer) || is_type(layer)) { - source->set_friendly_name(originalName + "." + std::to_string(i)); - } else { - source->set_friendly_name(originalName); - } - subgraph.layers[layer->get_friendly_name()] = layer; - } - } - } - } - } -} - bool ConcatTransformation::isHandled(const TransformationContext& context, const std::vector>& quantizationOperations) { for (const std::shared_ptr& quantizationLayer : quantizationOperations) { if (context.quantizedFakeQuantizeNames.find(quantizationLayer->get_friendly_name()) != context.quantizedFakeQuantizeNames.end()) { @@ -457,32 +308,6 @@ bool ConcatTransformation::isHandled(const TransformationContext& context, const return false; } -size_t ConcatTransformation::getMinQuantizationLevels( - const DataPrecision& dataPrecision, - const float maxOutputInterval, - const std::vector& quantizationLayersDetails, - const float outputLowValue, - const float outputHighValue) const { - size_t minLevels = std::numeric_limits::max(); - for (const QuantizationDetails quantizationDetails : quantizationLayersDetails) { - // if there is negative part then calculation is based on `outputLowValue` if not then on `outputHighValue` only - const float updatedOutputLowValue = outputLowValue != 0.f ? - (quantizationDetails.outputLowValues[0] / outputLowValue) * dataPrecision.min : - (quantizationDetails.outputLowValues[0] / outputHighValue) * dataPrecision.max; - - // if there is positive part then calculation is based on `outputHighValue` if not then on `outputLowValue` only - const float updatedOutputHighValue = outputHighValue != 0.f ? - (quantizationDetails.outputHighValues[0] / outputHighValue) * dataPrecision.max : - (quantizationDetails.outputHighValues[0] / outputLowValue) * dataPrecision.min; - - const size_t levels = static_cast(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0); - if (minLevels > levels) { - minLevels = levels; - } - } - return minLevels; -} - } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp b/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp deleted file mode 100644 index cd1f01a54f68cc..00000000000000 --- a/inference-engine/src/low_precision_transformations/src/concat_multi_channels.cpp +++ /dev/null @@ -1,334 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "low_precision/concat_multi_channels.hpp" - -#include -#include -#include -#include -#include - -#include -#include - -#include "low_precision/common/fake_quantize_dequantization.hpp" -#include "low_precision/common/dequantization_op.hpp" -#include "low_precision/common/ie_lpt_exception.hpp" -#include "low_precision/common/subgraph.hpp" -#include "low_precision/network_helper.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -bool ConcatMultiChannelsTransformation::isMultiChannel(const std::vector>& concatLayers) const noexcept { - for (const std::shared_ptr& concat : concatLayers) { - const std::vector> children = getChildrenRecursivelyExceptPrecisionPreserved(concat); - for (const std::shared_ptr& child : children) { - if ((is_type(child.get()) || - is_type(child.get())) && - this->layerTransformationsManager->isQuantized(child)) { - return false; - } - } - } - return true; -} - -void ConcatMultiChannelsTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addSingleNodePattern(pass, context); -} - -bool ConcatMultiChannelsTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { - std::shared_ptr concat = ngraph::as_type_ptr(m.get_match_root()); - if (!canBeTransformed(context, concat)) { - return false; - } - - ngraph::pass::low_precision::Subgraph subgraph(layerTransformationsManager); - std::unordered_set handledLayers; - if (!subgraph.fillSubgraphForConcat(concat, handledLayers)) { - return false; - } - - if (subgraph.quantizationLayers.empty() || isHandled(context, subgraph.quantizationLayers)) { - return false; - } - - if (!isMultiChannel(subgraph.concatLayers)) { - ConcatTransformation::transform(context, m); - return false; - } - - DataPrecision dataPrecision; - { - std::vector concatChildrenPrecisions = precisionsOnActivations; - for (auto quantizationLayer : subgraph.quantizationLayers) { - std::shared_ptr fq = ngraph::as_type_ptr(quantizationLayer->shared_from_this()); - if (!NetworkHelper::isQuantizeSupported(fq)) { - return false; - } - - // define concatenation operation consumers precisions - std::vector fqChildrenPrecisions = precisionsOnActivations; - fillAvailablePrecisions(quantizationLayer, fqChildrenPrecisions); - concatChildrenPrecisions = NetworkHelper::precisionIntersection(concatChildrenPrecisions, fqChildrenPrecisions); - if (concatChildrenPrecisions.empty()) { - return false; - } - - // define FakeQuantize precisions without zero point - const DataPrecision tmp = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false); - if (dataPrecision.precision == ngraph::element::undefined) { - dataPrecision = tmp; - continue; - } - - if ((tmp.precision != dataPrecision.precision) && (tmp.precision == ngraph::element::u8)) { - dataPrecision = tmp; - } - } - - if (std::find(concatChildrenPrecisions.begin(), concatChildrenPrecisions.end(), dataPrecision.precision) == concatChildrenPrecisions.end()) { - dataPrecision = DataPrecision(concatChildrenPrecisions[0]); - } - } - - for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { - const std::shared_ptr fq = ngraph::as_type_ptr(subgraph.quantizationLayers[i]); - if (fq == nullptr) { - return false; - } - - if (!NetworkHelper::isQuantizeSupported(fq)) { - return false; - } - } - - std::unordered_map dequantizations; - - for (size_t i = 0; i < subgraph.quantizationLayers.size(); ++i) { - const std::shared_ptr& fakeQuantizeLayer = subgraph.quantizationLayers[i]; - - std::shared_ptr fq = ngraph::as_type_ptr(fakeQuantizeLayer->shared_from_this()); - assert(fq); - - auto newFakeQuantize = NetworkHelper::fuseConvert(fq); - if (newFakeQuantize != nullptr) { - fq = newFakeQuantize; - } - - newFakeQuantize = NetworkHelper::composeFakeQuantize(fq); - if (newFakeQuantize != nullptr) { - fq = newFakeQuantize; - } - - const DataPrecision currentDataPrecision = getDataPrecision(fq, QuantizationDetails::getDetails(fq), false); - const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq); - - // 1. get data for dequantization. Dequantization data will be used several times later. - const FakeQuantizeDequantization fakeQuantizeDequantization = ngraph::pass::low_precision::NetworkHelper::createDequantizationFromFakeQuantize( - fq, - dataPrecision.precision, - dataPrecision.min, - dataPrecision.max, - dataPrecision.precision == currentDataPrecision.precision ? currentDataPrecision.hasZeroPoint : true, - updatePrecisions, - deqPrecision); - dequantizations[fakeQuantizeLayer->get_friendly_name()] = fakeQuantizeDequantization; - - // 2. update FakeQuantize - one time action - const std::shared_ptr newFakeQuantizeLayer = ngraph::pass::low_precision::NetworkHelper::updateFakeQuantize( - fq, - updatePrecisions ? dataPrecision.precision : fakeQuantizeLayer->get_output_element_type(0), - roundf(dataPrecision.min), - roundf(dataPrecision.max)); - - subgraph.quantizationLayers[i] = newFakeQuantizeLayer; - subgraph.layers[fakeQuantizeLayer->get_friendly_name()] = newFakeQuantizeLayer; - } - - auto dequantizationValuesCallback = [&]( - std::shared_ptr layer, - std::shared_ptr child, - const std::string originalLayerName, - std::vector& dequantizationsToConcatenate) { - if (layer->get_friendly_name() != originalLayerName) { - const auto update = []( - const std::string& originalLayerName, - const std::string& newLayerName, - std::unordered_map& dequantizationLayers) { - auto it = dequantizationLayers.find(originalLayerName); - if (it != dequantizationLayers.end()) { - dequantizationLayers.emplace(newLayerName, it->second); - dequantizationLayers.erase(it); - } - }; - update(originalLayerName, layer->get_friendly_name(), dequantizations); - } - - fillDequantization( - layer, - dequantizations, - dequantizationsToConcatenate); - - if (!is_type(layer)) { - // for intermediate layers we should get Dq operations to be inserted between layer and child - assert(dequantizationsToConcatenate.size() == 1ul); - const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(layer, child); - if (layer->get_input_partial_shape(0)[1] != layer->get_output_partial_shape(sourceOutputIdx)[1]) { - dequantizationsToConcatenate[0] = getFoldedDequantization(layer, dequantizationsToConcatenate[0], sourceOutputIdx); - } - } - }; - - addDequantizationLayers(context, subgraph, dequantizationValuesCallback); - - if (updatePrecisions) { - for (const auto it : subgraph.layers) { - const std::shared_ptr node = it.second; - if (std::dynamic_pointer_cast(node)) { - ngraph::pass::low_precision::NetworkHelper::setOutDataPrecisionForTypeRelaxed(node->shared_from_this(), dataPrecision.precision); - } else { - // set precision to explicitly to have updated precision during transformation - for (size_t i = 0; i < node->get_output_size(); ++i) { - node->set_output_type(i, dataPrecision.precision, node->get_output_partial_shape(i)); - } - } - } - } - - for (const std::shared_ptr& quantizationLayer : subgraph.quantizationLayers) { - context.quantizedFakeQuantizeNames.insert(quantizationLayer->get_friendly_name()); - } - return true; -} - -bool ConcatMultiChannelsTransformation::isPrecisionPreserved(std::shared_ptr) const noexcept { - return true; -} - -void ConcatMultiChannelsTransformation::fillDequantization( - const std::shared_ptr layer, - const std::unordered_map& dequantizationByFakeQuantize, - std::vector& dequantization) const { - const auto fillDqByFakeQuantize = [&](const std::shared_ptr& fq) { - const auto it = dequantizationByFakeQuantize.find(fq->get_friendly_name()); - if (it == dequantizationByFakeQuantize.end()) { - THROW_IE_LPT_EXCEPTION(*fq) << "dequantization scale values are not found"; - } - - const FakeQuantizeDequantization& fakeQuantizeDequantization = it->second; - dequantization.push_back(fakeQuantizeDequantization); - }; - - if (is_type(layer)) { - fillDqByFakeQuantize(layer); - } else { - for (size_t i = 0; i < layer->get_input_size(); ++i) { - std::shared_ptr parent = layer->get_input_node_shared_ptr(i); - if (as_type_ptr(parent)) { - continue; - } - - const auto fakeQuantize = ngraph::as_type_ptr(parent); - if (fakeQuantize) { - fillDqByFakeQuantize(fakeQuantize); - } else { - const auto concat = ngraph::as_type_ptr(parent); - if (concat) { - std::vector dequantizationToConcatenate; - fillDequantization(concat, dequantizationByFakeQuantize, dequantizationToConcatenate); - - // add concatenated dequantization operations to dequantization collection - dequantization.push_back(getConcatenatedDequantization(concat, dequantizationToConcatenate)); - } else { - const size_t sourceOutputIdx = NetworkHelper::getParentOutputIndex(parent, layer); - if (parent->get_input_partial_shape(0)[1] != parent->get_output_partial_shape(sourceOutputIdx)[1]) { - std::vector dequantizationToPropagate; - fillDequantization(parent, dequantizationByFakeQuantize, dequantizationToPropagate); - - // add folded dequantization operations to dequantization colection - dequantization.push_back(getFoldedDequantization(parent, dequantizationToPropagate[0], sourceOutputIdx)); - } else { - fillDequantization(parent, dequantizationByFakeQuantize, dequantization); - } - } - } - } - } -} - -FakeQuantizeDequantization ConcatMultiChannelsTransformation::getConcatenatedDequantization( - const std::shared_ptr concat, - const std::vector& dequantization) const { - NodeVector convertNodes; - NodeVector subtractNodes; - NodeVector multiplyNodes; - - // forming nodes for concatenation - fillDequantizationNodes(dequantization, concat, convertNodes, subtractNodes, multiplyNodes); - - std::shared_ptr parent = concat; - std::shared_ptr convert; - if (!convertNodes.empty()) { - convert = as_type_ptr(dequantization[0].convert->clone_with_new_inputs({ parent })); - parent = convert; - } - - std::shared_ptr subtract; - std::shared_ptr subConst; - if (!subtractNodes.empty()) { - subConst = as_type_ptr(concatenateDeqNodes(subtractNodes)); - subtract = std::make_shared(parent, subConst); - parent = subtract; - } - - std::shared_ptr multiply; - std::shared_ptr mulConst; - if (!multiplyNodes.empty()) { - mulConst = as_type_ptr(concatenateDeqNodes(multiplyNodes)); - multiply = std::make_shared(parent, mulConst); - } - - return FakeQuantizeDequantization(concat, convert, subtract, nullptr, subConst, multiply, mulConst); -} - -FakeQuantizeDequantization ConcatMultiChannelsTransformation::getFoldedDequantization( - const std::shared_ptr operation, - const FakeQuantizeDequantization& dequantization, - const size_t sourceOutputIdx) { - OutputVector inputs = operation->input_values(); - OutputVector outputs(operation->get_output_size()); - Output data = operation->output(sourceOutputIdx); - - std::shared_ptr parent = operation; - std::shared_ptr convert; - if (dequantization.convert) { - convert = as_type_ptr(dequantization.convert->clone_with_new_inputs({ data })); - parent = convert; - } - - std::shared_ptr subtract; - std::shared_ptr subConst; - if (dequantization.subtract) { - subConst = NetworkHelper::foldDequantizationConstant(dequantization.subtractConstant, operation, sourceOutputIdx); - subtract = std::make_shared(parent, subConst); - parent = subtract; - } - - std::shared_ptr multiply; - std::shared_ptr mulConst; - if (dequantization.multiply) { - mulConst = NetworkHelper::foldDequantizationConstant(dequantization.multiplyConstant, operation, sourceOutputIdx); - multiply = std::make_shared(parent, mulConst); - } - - return FakeQuantizeDequantization(data, convert, subtract, nullptr, subConst, multiply, mulConst); -} - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/convert.cpp b/inference-engine/src/low_precision_transformations/src/convert.cpp index 19bcce50e8c8a6..e96fc4820c77e3 100644 --- a/inference-engine/src/low_precision_transformations/src/convert.cpp +++ b/inference-engine/src/low_precision_transformations/src/convert.cpp @@ -11,6 +11,7 @@ #include #include +#include #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -18,11 +19,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void ConvertTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConvertTransformation, "ConvertTransformation", 0); + +ConvertTransformation::ConvertTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "ConvertTransformation"); + this->register_matcher(m, callback); } -bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool ConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr convert = as_type_ptr(m.get_match_root()); if (!convert) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/convolution.cpp b/inference-engine/src/low_precision_transformations/src/convolution.cpp index 1dc4c42b476f34..889315678e9704 100644 --- a/inference-engine/src/low_precision_transformations/src/convolution.cpp +++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp @@ -10,6 +10,8 @@ #include #include +#include +#include #include "low_precision/network_helper.hpp" #include "low_precision/common/dequantization_op.hpp" @@ -17,28 +19,39 @@ namespace ngraph { namespace pass { namespace low_precision { -ConvolutionTransformation::ConvolutionTransformation(const Params& params) : WeightableLayerTransformation(params) { -} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ConvolutionTransformation, "ConvolutionTransformation", 0); -void ConvolutionTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +ConvolutionTransformation::ConvolutionTransformation(const Params& params) : WeightableLayerTransformation(params) { + auto matcher = ngraph::pattern::wrap_type({ + ngraph::pattern::wrap_type(), + std::make_shared(OutputVector { + pattern::wrap_type(), + pattern::wrap_type() + }) + }); + + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "ConvolutionTransformation"); + this->register_matcher(m, callback); } -bool ConvolutionTransformation::isQuantized(std::shared_ptr layer) const noexcept { - return WeightableLayerTransformation::isQuantized(layer, false); +bool ConvolutionTransformation::isQuantized(const std::shared_ptr& layer) const noexcept { + return ConvolutionTransformation::isQuantizedStatic(layer); } +bool ConvolutionTransformation::isQuantizedStatic(const std::shared_ptr& layer) noexcept { + return WeightableLayerTransformation::isQuantizedStatic(layer, false); +} - -bool ConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool ConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { auto convolution = m.get_match_root(); if (!canConvolutionBeTransformed(context, convolution)) { @@ -150,7 +163,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph reducedConstant->cast_vector()[0]); } - const auto copyNode = convolution->copy_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) }); + const auto copyNode = convolution->clone_with_new_inputs({ dequantization.multiply->input_value(0), convolution->input_value(1) }); auto conv = as_type_ptr(copyNode); std::shared_ptr relaxedNewConvolution; if (conv) { @@ -164,6 +177,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph std::vector{deqPrecision, deqPrecision}, std::vector{deqPrecision}); } + NetworkHelper::copyInfo(convolution, relaxedNewConvolution); std::shared_ptr newMultiplyAfter = std::make_shared>( std::vector{ deqPrecision, deqPrecision }, @@ -179,12 +193,18 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph convolution->get_input_node_ptr(0)->get_input_source_output(0), convolution->input_value(1)}); replace_node(convolution, newConvolution); + NetworkHelper::copyInfo(convolution, newConvolution); convolution = newConvolution; } } { - decomposeFakeQuantizeForWeightsPath(convolution); + const bool decomposed = decomposeFakeQuantizeForWeightsPath(convolution); + assert((updatePrecisions && decomposed) || (!updatePrecisions)); + if (!updatePrecisions && !decomposed) { + // TODO: LPT: issue #58685 + return false; + } std::shared_ptr reshapeFromWeights = as_type_ptr(convolution->input_value(1).get_node_shared_ptr()); @@ -218,13 +238,16 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph reshapeFromWeights->input_value(1) })); } + auto newConvolution = convolution->clone_with_new_inputs({ + convolution->input_value(0), + reshapeFromWeights != nullptr ? + reshapeFromWeights : + multiplyFromWeights->input_value(0) + }); + NetworkHelper::copyInfo(convolution, newConvolution); + auto newMultiplyAfter = std::make_shared( - convolution->copy_with_new_inputs({ - convolution->input_value(0), - reshapeFromWeights != nullptr ? - reshapeFromWeights : - multiplyFromWeights->input_value(0) - }), + newConvolution, foldConvert( fold_reshape( multiplyFromWeights->input_value(1), @@ -270,6 +293,7 @@ bool ConvolutionTransformation::transform(TransformationContext &context, ngraph convolution->get_input_node_ptr(1)->input_value(0) : childNode->copy_with_new_inputs({convertFromWeights->input_value(0), childNode->input_value(1)})}); replace_node(convolution, newConvolution); + NetworkHelper::copyInfo(convolution, newConvolution); convolution = newConvolution; } diff --git a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp index a73ee1de155781..54e010d3a84a7b 100644 --- a/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp +++ b/inference-engine/src/low_precision_transformations/src/convolution_backprop_data.cpp @@ -10,6 +10,8 @@ #include #include +#include +#include #include "low_precision/network_helper.hpp" #include "low_precision/common/dequantization_op.hpp" @@ -18,41 +20,48 @@ namespace pass { namespace low_precision { ConvolutionBackpropDataTransformation::ConvolutionBackpropDataTransformation(const Params& params) : WeightableLayerTransformation(params) { + auto matcher = std::make_shared(OutputVector{ + pattern::wrap_type({ + pattern::wrap_type(), + pattern::wrap_type() + }), + ngraph::pattern::wrap_type({ + pattern::wrap_type(), + pattern::wrap_type() + }), + ngraph::pattern::wrap_type({ + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type() + }), + ngraph::pattern::wrap_type({ + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type() + }), + }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "ConvolutionBackpropDataTransformation"); + this->register_matcher(m, callback); } -void ConvolutionBackpropDataTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - addPattern( - pass, - context, - make_op_pattern( - { make_op_label(), make_op_label(), make_op_label() })); - addPattern( - pass, - context, - make_op_pattern( - { make_op_label(), make_op_label(), make_op_label() })); +bool ConvolutionBackpropDataTransformation::isQuantized(const std::shared_ptr& layer) const noexcept { + return ConvolutionBackpropDataTransformation::isQuantizedStatic(layer); } -bool ConvolutionBackpropDataTransformation::isQuantized(std::shared_ptr layer) const noexcept { - if (deconvolutionSpecificChannelsRatio) { - size_t inputChannels = layer->get_input_shape(0)[1]; - size_t outputChannels = layer->get_output_shape(0)[1]; - if (inputChannels % 4 != 0 || outputChannels % 16 != 0) { - return false; - } - } - return WeightableLayerTransformation::isQuantized(layer, false); +bool ConvolutionBackpropDataTransformation::isQuantizedStatic(const std::shared_ptr& layer) noexcept { + return WeightableLayerTransformation::isQuantizedStatic(layer, false); } -bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool ConvolutionBackpropDataTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { auto convolutionBackpropData = m.get_match_root(); if (!canBeTransformed(context, convolutionBackpropData)) { @@ -63,7 +72,15 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con NetworkHelper::getDequantization(reshapeFromWeights); if (dequantization.empty()) { const auto fqOnWeights = getFakeQuantizeOnWeights(convolutionBackpropData); - std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(fqOnWeights); + auto constantShape = fqOnWeights->input(1).get_partial_shape(); + if (constantShape.is_dynamic() || constantShape.rank().is_dynamic()) { + return false; + } + + std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize( + fqOnWeights, + false, + (constantShape.rank().get_length() < 2) || constantShape[1] != 1ul ? 1ul : 0ul); if (reshapeFromWeights != nullptr) { resultConstant = fold_reshape( resultConstant, @@ -198,18 +215,11 @@ bool ConvolutionBackpropDataTransformation::transform(TransformationContext &con rt["DISABLED_CONSTANT_FOLDING"] = std::make_shared>(""); } + return true; } bool ConvolutionBackpropDataTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr op) const { - if (deconvolutionSpecificChannelsRatio) { - size_t inputChannels = op->get_input_shape(0)[1]; - size_t outputChannels = op->get_output_shape(0)[1]; - if (inputChannels % 4 != 0 || outputChannels % 16 != 0) { - return false; - } - } - return canConvolutionBeTransformed(context, op); } diff --git a/inference-engine/src/low_precision_transformations/src/create_precisions_dependent_attribute.cpp b/inference-engine/src/low_precision_transformations/src/create_precisions_dependent_attribute.cpp new file mode 100644 index 00000000000000..7ddd060b06dc6d --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/create_precisions_dependent_attribute.cpp @@ -0,0 +1,22 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/create_precisions_dependent_attribute.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "low_precision/rt_info/precisions_attribute.hpp" +#include "low_precision/rt_info/precision_preserved_attribute.hpp" +#include "low_precision/network_helper.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; diff --git a/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp b/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp index c004d0ca59f92a..09d3b6fac17e33 100644 --- a/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp +++ b/inference-engine/src/low_precision_transformations/src/depth_to_space.cpp @@ -4,25 +4,32 @@ #include "low_precision/depth_to_space.hpp" -#include #include -#include -#include - +#include #include "low_precision/network_helper.hpp" using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; -void DepthToSpaceTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::DepthToSpaceTransformation, "DepthToSpaceTransformation", 0); + +DepthToSpaceTransformation::DepthToSpaceTransformation(const Params& params) : TransparentBaseTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "DepthToSpaceTransformation"); + this->register_matcher(m, callback); } -bool DepthToSpaceTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool DepthToSpaceTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { std::shared_ptr depthToSpace = m.get_match_root(); if (!canBeTransformed(context, depthToSpace)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp index 90aeb5aabe8bc2..93e6aa813c1cbb 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "low_precision/network_helper.hpp" @@ -14,11 +15,25 @@ namespace ngraph { namespace pass { namespace low_precision { -void FakeQuantizeTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FakeQuantizeTransformation, "FakeQuantizeTransformation", 0); + +FakeQuantizeTransformation::FakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "FakeQuantizeTransformation"); + this->register_matcher(m, callback); } -bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr layer = std::dynamic_pointer_cast(m.get_match_root()); if (!QuantizationDetails::outputLayoutIsSupported(layer)) { return false; @@ -28,13 +43,14 @@ bool FakeQuantizeTransformation::transform(TransformationContext& context, ngrap return false; } + bool wasHandled = false; std::shared_ptr fakeQuantize = layer; do { - layer = fakeQuantize; - fakeQuantize = fuseElementwise(context, fakeQuantize); + fakeQuantize = fuseElementwise(context, this, fakeQuantize); + wasHandled = wasHandled || (fakeQuantize != nullptr); } while (fakeQuantize != nullptr); - return true; + return wasHandled; } namespace fq { @@ -110,6 +126,7 @@ bool FakeQuantizeTransformation::checkElementwise(const std::shared_ptr& e std::shared_ptr FakeQuantizeTransformation::fuseElementwise( TransformationContext& context, + MatcherPass* matcherPass, const std::shared_ptr& fakeQuantize) const { const std::shared_ptr eltwise = fakeQuantize->get_input_node_shared_ptr(0); @@ -172,6 +189,7 @@ std::shared_ptr FakeQuantizeTransformation::fuseElementwis const auto data = fq::getData(eltwise); const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise); + std::shared_ptr newFakeQuantize = as_type_ptr(fakeQuantize->clone_with_new_inputs({ data->output(outputIdx), inputLowConst_f32, @@ -179,6 +197,8 @@ std::shared_ptr FakeQuantizeTransformation::fuseElementwis foldConvert(fakeQuantize->input_value(3), deqPrecision), foldConvert(fakeQuantize->input_value(4), deqPrecision) })); + matcherPass->register_new_node(newFakeQuantize); + replace_node(fakeQuantize, newFakeQuantize); ngraph::copy_runtime_info({ fakeQuantize, eltwise }, newFakeQuantize); newFakeQuantize->set_friendly_name(fakeQuantize->get_friendly_name()); diff --git a/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp b/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp index b9d491238aac98..b522546c55e342 100644 --- a/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp +++ b/inference-engine/src/low_precision_transformations/src/fake_quantize_decomposition.cpp @@ -6,20 +6,252 @@ #include #include +#include +#include #include "low_precision/common/ie_lpt_exception.hpp" +#include "low_precision/rt_info/precisions_attribute.hpp" +#include "low_precision/rt_info/intervals_alignment_attribute.hpp" +#include "low_precision/rt_info/quantization_alignment_attribute.hpp" #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -void FakeQuantizeDecompositionTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FakeQuantizeDecompositionTransformation, "FakeQuantizeDecompositionTransformation", 0); + +FakeQuantizeDecompositionTransformation::FakeQuantizeDecompositionTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "FakeQuantizeDecompositionTransformation"); + this->register_matcher(m, callback); +} + +namespace fq_decomposition { + +// get precision details, depends on: +// 1. FakeQuantize operation parameters (QuantizationDetails::getDetails & LayerTransformation::getPrecisionDetails) +// 2. Precisions on port +DataPrecision getDataPrecisionByOutputPortAndFakeQuantize(std::shared_ptr layer) { + const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer); + auto precisionsAttribute = getAttributeFromOutput>(layer->output(0)); + if (precisionsAttribute == nullptr) { + // TODO: explore this case in more details: + // 1. we should not be here + assert(true); + + // 2. not possible to get optimal precision by decomposed FakeQuantize + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails(quantizationDetails); + return DataPrecision( + precisionDetailsAtOutputIntervals.precision, + DataPrecision::getMinValue(precisionDetailsAtOutputIntervals.precision, quantizationDetails.levels), + DataPrecision::getMaxValue(precisionDetailsAtOutputIntervals.precision, quantizationDetails.levels), + precisionDetailsAtOutputIntervals.hasZeroPoint); + } + + const auto& precisions = precisionsAttribute->get()->sharedValue->precisions; + + ngraph::element::Type precision; + bool hasZeroPoint; + if (precisions.size() > 1ul) { + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails(quantizationDetails); + const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision); + + if (foundIt == precisions.end()) { + precision = *precisions.begin(); + hasZeroPoint = true; + } else { + precision = precisionDetailsAtOutputIntervals.precision; + hasZeroPoint = precisionDetailsAtOutputIntervals.hasZeroPoint; + } + + // update shared attribute to affect all operations in subgraph + precisionsAttribute->get()->sharedValue->precisions = { precision }; + } else { + // use only available precision + precision = *precisions.begin(); + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails(quantizationDetails); + hasZeroPoint = precisionDetailsAtOutputIntervals.precision != precision; + } + + return DataPrecision( + precision, + DataPrecision::getMinValue(precision, quantizationDetails.levels), + DataPrecision::getMaxValue(precision, quantizationDetails.levels), + hasZeroPoint); +} + +// get precision details, depends on: +// 1. FakeQuantize operation parameters (QuantizationDetails::getDetails & LayerTransformation::getPrecisionDetails) +// 2. Precisions on port +DataPrecision getDataPrecisionByOutputPort(std::shared_ptr layer) { + const size_t levels = layer->get_levels(); + const std::vector outputLowValues = as_type_ptr(layer->get_input_node_shared_ptr(3))->cast_vector(); + const std::vector outputHighValues = as_type_ptr(layer->get_input_node_shared_ptr(4))->cast_vector(); + + auto precisionsAttribute = getAttributeFromOutput>(layer->output(0)); + if (precisionsAttribute == nullptr) { + // TODO: explore this case in more details: + // 1. we should not be here + assert(true); + + // 2. not possible to get optimal precision by decomposed FakeQuantize + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails( + levels, + outputLowValues, + outputHighValues); + + return DataPrecision( + precisionDetailsAtOutputIntervals.precision, + DataPrecision::getMinValue(precisionDetailsAtOutputIntervals.precision, levels), + DataPrecision::getMaxValue(precisionDetailsAtOutputIntervals.precision, levels), + precisionDetailsAtOutputIntervals.hasZeroPoint); + } + + const auto& precisions = precisionsAttribute->get()->sharedValue->precisions; + + ngraph::element::Type precision; + bool hasZeroPoint; + if (precisions.size() > 1ul) { + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails( + levels, + outputLowValues, + outputHighValues); + const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision); + + if (foundIt == precisions.end()) { + precision = *precisions.begin(); + hasZeroPoint = true; + } else { + precision = precisionDetailsAtOutputIntervals.precision; + hasZeroPoint = precisionDetailsAtOutputIntervals.hasZeroPoint; + } + + // update shared attribute to affect all operations in subgraph + precisionsAttribute->get()->sharedValue->precisions = { precision }; + } else { + // use only available precision + precision = *precisions.begin(); + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails( + levels, + outputLowValues, + outputHighValues); + hasZeroPoint = precisionDetailsAtOutputIntervals.precision != precision; + } + + return DataPrecision( + precision, + DataPrecision::getMinValue(precision, levels), + DataPrecision::getMaxValue(precision, levels), + hasZeroPoint); +} + +// TODO: LPT: refactor: use one way to decompose FakeQuantize +std::shared_ptr decomposeFakeQuantize( + MatcherPass* matcherPass, + std::shared_ptr& layer, + const std::shared_ptr& intervalsAlignment, + const DataPrecision& dataPrecision, + const bool updatePrecisions, + const element::Type deqPrecision) { + std::shared_ptr dequantize; + if (intervalsAlignment != nullptr) { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "decomposeFakeQuantize1"); + const std::vector outputLowValues = as_type_ptr(layer->get_input_node_shared_ptr(3))->cast_vector(); + const std::vector outputHighValues = as_type_ptr(layer->get_input_node_shared_ptr(4))->cast_vector(); + + float dequantizationMul; + float dequantizationSub; + float updatedOutputLowValue; + float updatedOutputHighValue; + const size_t levels = NetworkHelper::calculateLevels( + dataPrecision.min, + dataPrecision.max, + intervalsAlignment->sharedValue->combinedInterval.low, + intervalsAlignment->sharedValue->combinedInterval.high, + outputLowValues[0], + outputHighValues[0], + dequantizationMul, + dequantizationSub, + updatedOutputLowValue, + updatedOutputHighValue); + + if ((updatePrecisions == false) && (dequantizationMul == 1.f) && (dequantizationSub == 0.f)) { + return nullptr; + } + + //TODO: pass min levels as a parameter? + if (levels < 2ul) { + return nullptr; + } + + // 2. update FakeQuantize - one time action + std::shared_ptr newFakeQuantizeLayer = ngraph::pass::low_precision::NetworkHelper::updateFakeQuantize( + layer, + updatePrecisions ? dataPrecision.precision : layer->get_output_element_type(0), + roundf(updatedOutputLowValue), + roundf(updatedOutputHighValue), + false); + matcherPass->register_new_node(newFakeQuantizeLayer); + newFakeQuantizeLayer->set_levels(levels); + + auto dequantization = ngraph::pass::low_precision::NetworkHelper::makeDequantization( + dequantizationMul, + dequantizationSub, + layer->get_output_element_type(0), + layer->get_output_partial_shape(0), + updatePrecisions ? dataPrecision.precision : layer->get_output_element_type(0), + deqPrecision, + newFakeQuantizeLayer); + + replace_node(layer, dequantization.multiply); + + std::vector> sourceNodes{ layer }; + std::vector> targetNodes{ newFakeQuantizeLayer, dequantization.multiply }; + if (dequantization.convert != nullptr) { + targetNodes.push_back(dequantization.convert); + } + if (dequantization.subtract != nullptr) { + targetNodes.push_back(dequantization.subtract); + } + NetworkHelper::copyInfo(sourceNodes, targetNodes); + + dequantize = dequantization.multiply; + } else { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "decomposeFakeQuantize2"); + // Split FakeQuantize to two parts: Quantize and Dequantize + auto QDQ = NetworkHelper::decomposeFakeQuantize( + as_type_ptr(layer), + dataPrecision.precision, + dataPrecision.min, + dataPrecision.max, + dataPrecision.hasZeroPoint, + updatePrecisions); + + const auto newFakeQuantize = std::get<0>(QDQ); + if (newFakeQuantize == nullptr) { + return nullptr; + } + matcherPass->register_new_node(newFakeQuantize); + dequantize = std::get<1>(QDQ); + } + + return dequantize; } -bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { - std::shared_ptr layer = std::dynamic_pointer_cast(m.get_match_root()); +} // namespace fq_decomposition + +bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { + auto layer = as_type_ptr(m.get_match_root()); if (!NetworkHelper::isQuantizeSupported(layer)) { return false; } @@ -30,59 +262,24 @@ bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& c layer = NetworkHelper::fuseConvert(layer); if (NetworkHelper::isConstantPath(layer)) { - // fold fq if constant just before fq and child layers aren't supported in LPT - if (as_type(layer->get_input_node_ptr(0))) { - bool nextOpearionsWillBeNotHandled = true; - for (auto output : layer->outputs()) { - for (auto input : output.get_target_inputs()) { - const auto node = input.get_node(); - - if (as_type(node)) { - for (const auto& child : NetworkHelper::consumers(node->shared_from_this())) { - if ((as_type_ptr(child)) && - (paramsManager->getPrecisionsOnActivations(*child).size() != 0ul)) { - nextOpearionsWillBeNotHandled = false; - break; - } - } - } - - if (paramsManager->getPrecisionsOnActivations(*input.get_node()).size() != 0ul) { - nextOpearionsWillBeNotHandled = false; - break; - } - } - - if (!nextOpearionsWillBeNotHandled) { - break; - } - } + return false; + } - if (nextOpearionsWillBeNotHandled) { - const std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(layer); - if (as_type_ptr(resultConstant)) { - replace_node(layer, resultConstant); - return true; - } - } - } + auto attribute = getAttributeFromOutput>(layer->output(0)); + if ((attribute == nullptr) || (attribute->get()->sharedValue->precisions.empty())) { return false; } - const ngraph::element::Type precision = layer->get_output_element_type(0); - if (DataPrecision::isSupported(precision)) { - const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer); + const ngraph::element::Type outputPrecision = layer->get_output_element_type(0); + if (DataPrecision::isSupported(outputPrecision)) { const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantizationBelow(layer); if (dequantization.empty()) { return false; } - const DataPrecision expectedDataPrecision = getDataPrecision(dequantization.multiply, quantizationDetails, false); - if (expectedDataPrecision.precision == element::undefined) { - return false; - } - - if (expectedDataPrecision.precision == precision) { + const DataPrecision expectedDataPrecision = fq_decomposition::getDataPrecisionByOutputPortAndFakeQuantize(layer); + // TODO: need test to compose FakeQuantize + if ((expectedDataPrecision.precision == element::undefined) || (expectedDataPrecision.precision == outputPrecision)) { return false; } @@ -92,76 +289,122 @@ bool FakeQuantizeDecompositionTransformation::transform(TransformationContext& c } } - if (as_type(layer->get_input_node_ptr(0))) { - bool nextOpearionsWillBeNotHandled = true; - for (auto output : layer->outputs()) { - for (auto input : output.get_target_inputs()) { - auto activations = paramsManager->getPrecisionsOnActivations(*input.get_node()); - if (paramsManager->getPrecisionsOnActivations(*input.get_node()).size() != 0ul) { - nextOpearionsWillBeNotHandled = false; - break; - } - } + if (!QuantizationDetails::outputLayoutIsSupported(layer)) { + return false; + } - if (!nextOpearionsWillBeNotHandled) { - break; - } + if (!QuantizationDetails::isSupportedLevel(layer->get_levels())) { + return false; + } + + DataPrecision dataPrecision = fq_decomposition::getDataPrecisionByOutputPort(layer); + + std::shared_ptr precisionsAttribute; + { + // TODO: LPT: return attribute (not wrapper) + auto attributeWrapper = getAttributeFromOutput>(layer->output(0)); + if (attributeWrapper == nullptr) { + THROW_IE_LPT_EXCEPTION(*layer) << "PrecisionAttribute is absent"; } + precisionsAttribute = attributeWrapper->get(); + if (precisionsAttribute == nullptr) { + THROW_IE_LPT_EXCEPTION(*layer) << "PrecisionAttribute is absent"; + } + } - if (nextOpearionsWillBeNotHandled) { - const std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize(layer); - if (as_type_ptr(resultConstant)) { - replace_node(layer, resultConstant); - return true; + std::shared_ptr quantizationAlignment; + for (const auto& input : layer->output(0).get_target_inputs()) { + const auto alignmentValueWrapper = low_precision::getAttribute>(input.get_node()->shared_from_this()); + if (alignmentValueWrapper != nullptr) { + quantizationAlignment = alignmentValueWrapper->get(); + if (quantizationAlignment->sharedValue->value) { + break; } } } - if (!QuantizationDetails::outputLayoutIsSupported(layer)) { - return false; + std::shared_ptr intervalsAlignment; + { + if ((quantizationAlignment != nullptr) && quantizationAlignment->sharedValue->value) { + auto intervalsAlignmentWrapper = low_precision::getAttribute>(layer); + if (intervalsAlignmentWrapper != nullptr) { + intervalsAlignment = intervalsAlignmentWrapper->get(); + } + } } - if (!QuantizationDetails::isSupportedLevel(layer->get_levels())) { + // FakeQuantize operations are combined in supported cascade (per tensor quantization) + if ((intervalsAlignment != nullptr) && (intervalsAlignment->sharedValue->minLevels <= 2ul)) { return false; } - const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(layer); - const DataPrecision dataPrecision = getDataPrecision(layer, quantizationDetails, false); + // if IntervalsAlignment attribute is defined then, the attribute defines decomposition parameters, + // if IntervalsAlignment attribute is not defined, then FakeQuantize operation intervals define decomposition parameters if (dataPrecision.precision == element::undefined) { - return false; - } + element::Type precision; + const auto levels = layer->get_levels(); + const std::vector outputLowValues = as_type_ptr(layer->get_input_node_shared_ptr(3))->cast_vector(); + const std::vector outputHighValues = as_type_ptr(layer->get_input_node_shared_ptr(4))->cast_vector(); + if (intervalsAlignment == nullptr) { + // define precision by FakeQuantize intervals + LayerTransformation::PrecisionDetails precisionDetailsAtOutputIntervals = LayerTransformation::getPrecisionDetails( + levels, + outputLowValues, + outputHighValues); + const auto foundIt = std::find( + precisionsAttribute->sharedValue->precisions.begin(), + precisionsAttribute->sharedValue->precisions.end(), + precisionDetailsAtOutputIntervals.precision); - // Split FakeQuantize to two parts: Quantize and Dequantize - auto QDQ = NetworkHelper::decomposeFakeQuantize( - as_type_ptr(layer), - dataPrecision.precision, - dataPrecision.min, - dataPrecision.max, - dataPrecision.hasZeroPoint, - updatePrecisions); + bool hasZeroPoint; + if (foundIt == precisionsAttribute->sharedValue->precisions.end()) { + precision = *precisionsAttribute->sharedValue->precisions.begin(); + hasZeroPoint = true; + } else { + precision = precisionDetailsAtOutputIntervals.precision; + hasZeroPoint = precisionDetailsAtOutputIntervals.hasZeroPoint; + } -#ifdef LPT_PRINT_DEQUANTIZATION_INFO - { - const std::shared_ptr multiply = as_type_ptr(std::get<1>(QDQ)); - const std::shared_ptr multiplyConst = as_type_ptr(multiply->get_input_node_shared_ptr(1)); - const std::vector dequantizationScales = multiplyConst->cast_vector(); - - const std::shared_ptr subtract = as_type_ptr(multiply->get_input_node_shared_ptr(0)); - std::vector dequantizationShifts; - if (subtract != nullptr) { - const std::shared_ptr subtractConst = as_type_ptr(subtract->get_input_node_shared_ptr(1)); - dequantizationShifts = subtractConst->cast_vector(); + dataPrecision = DataPrecision( + precision, + DataPrecision::getMinValue(precision, levels), + DataPrecision::getMaxValue(precision, levels), + hasZeroPoint); } else { - dequantizationShifts = std::vector(dequantizationScales.size()); + // define precision by attribute + if (intervalsAlignment->sharedValue->preferablePrecisions.empty()) { + // TODO: LPT: add user defined preferredPrecision + precision = *precisionsAttribute->sharedValue->precisions.begin(); + } else { + // TODO: LPT: add user defined preferredPrecision + precision = *intervalsAlignment->sharedValue->preferablePrecisions.begin(); + } + + dataPrecision = DataPrecision( + precision, + DataPrecision::getMinValue(precision, levels), + DataPrecision::getMaxValue(precision, levels), + LayerTransformation::getPrecisionDetails(levels, outputLowValues, outputHighValues).precision != precision); } + } - printDequantizationValues(dequantizationScales, dequantizationShifts); + std::shared_ptr dequantize = fq_decomposition::decomposeFakeQuantize( + this, + layer, + intervalsAlignment, + dataPrecision, + updatePrecisions, + deqPrecision); + if (dequantize == nullptr) { + return false; } -#endif - std::shared_ptr dequantize = std::get<1>(QDQ); updateOutput(context, dequantize, layer); + if (precisionsAttribute->sharedValue->precisions.size() != 1ul) { + precisionsAttribute->sharedValue->precisions = { dataPrecision.precision }; + } + return true; } diff --git a/inference-engine/src/low_precision_transformations/src/fold_convert.cpp b/inference-engine/src/low_precision_transformations/src/fold_convert.cpp index 091380442b8244..5e673a1ef512f4 100644 --- a/inference-engine/src/low_precision_transformations/src/fold_convert.cpp +++ b/inference-engine/src/low_precision_transformations/src/fold_convert.cpp @@ -5,18 +5,32 @@ #include "low_precision/fold_convert.hpp" #include #include -#include "low_precision/fake_quantize.hpp" +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -void FoldConvertTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FoldConvertTransformation, "FoldConvertTransformation", 0); + +FoldConvertTransformation::FoldConvertTransformation(const Params& params) : LayerTransformation(params) { + auto subtract = pattern::wrap_type(); + auto matcher = std::make_shared(subtract, "FoldConvertTransformation"); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + this->register_matcher(matcher, callback); } -bool FoldConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FoldConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { const auto subtract = m.get_match_root(); if (!canBeTransformed(context, subtract)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/fold_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fold_fake_quantize.cpp new file mode 100644 index 00000000000000..7984d946f865ac --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/fold_fake_quantize.cpp @@ -0,0 +1,72 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/fold_fake_quantize.hpp" + +#include +#include +#include + +#include +#include "low_precision/network_helper.hpp" + +namespace ngraph { +namespace pass { +namespace low_precision { + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FoldFakeQuantizeTransformation, "FoldFakeQuantizeTransformation", 0); + +FoldFakeQuantizeTransformation::FoldFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { + auto fakeQuantize = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(fakeQuantize, "FoldFakeQuantizeTransformation"); + this->register_matcher(m, callback); +} + +bool FoldFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { + const auto fakeQuantize = as_type_ptr(m.get_match_root()); + if (fakeQuantize == nullptr) { + return false; + } + + if (!canBeTransformed(context, fakeQuantize)) { + return false; + } + + const auto constantShape = fakeQuantize->input(1).get_partial_shape(); + if (constantShape.is_dynamic() || constantShape.rank().is_dynamic()) { + return false; + } + + std::shared_ptr resultConstant = NetworkHelper::fold_fake_quantize( + fakeQuantize, + false, + (constantShape.rank().get_length() < 2) || constantShape[1] != 1ul ? 1ul : 0ul); + if (is_type(resultConstant)) { + replace_node(fakeQuantize, resultConstant); + return true; + } + + return false; +} + +bool FoldFakeQuantizeTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr op) const { + return NetworkHelper::isConstantPath(op); +} + +bool FoldFakeQuantizeTransformation::isPrecisionPreserved(std::shared_ptr layer) const noexcept { + return false; +} + +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/fuse_convert.cpp b/inference-engine/src/low_precision_transformations/src/fuse_convert.cpp index 38aa2133940308..48fbea0211946a 100644 --- a/inference-engine/src/low_precision_transformations/src/fuse_convert.cpp +++ b/inference-engine/src/low_precision_transformations/src/fuse_convert.cpp @@ -5,9 +5,11 @@ #include "low_precision/fuse_convert.hpp" #include -#include #include +#include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -15,21 +17,25 @@ namespace ngraph { namespace pass { namespace low_precision { -void FuseConvertTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FuseConvertTransformation, "FuseConvertTransformation", 0); + +FuseConvertTransformation::FuseConvertTransformation(const Params& params) : LayerTransformation(params) { + auto multiply = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + auto subtract = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + auto add = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + auto matcher = std::make_shared( + std::make_shared(OutputVector{ multiply, subtract, add }), + "FuseConvertTransformation"); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + this->register_matcher(matcher, callback); } std::shared_ptr removeConvertIfPossibleForSubtract( @@ -50,7 +56,7 @@ std::shared_ptr removeConvertIfPossibleForSubtract( return newSubtract; } -bool FuseConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FuseConvertTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { const auto op = m.get_match_root(); if (!canBeTransformed(context, op)) { return false; @@ -84,10 +90,13 @@ bool FuseConvertTransformation::transform(TransformationContext& context, ngraph replace_node(op, newOp); } - if (newOp != nullptr) { - ngraph::copy_runtime_info({ convert, op }, newOp); - newOp->set_friendly_name(op->get_friendly_name()); + if (newOp == nullptr) { + return false; } + + ngraph::copy_runtime_info({ convert, op }, newOp); + newOp->set_friendly_name(op->get_friendly_name()); + register_new_node(newOp); } return true; diff --git a/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp index 6ef45c0b6cae2c..b15b466b4761c0 100644 --- a/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fuse_fake_quantize.cpp @@ -5,6 +5,7 @@ #include "low_precision/fuse_fake_quantize.hpp" #include #include +#include #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -12,11 +13,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void FuseFakeQuantizeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FuseFakeQuantizeTransformation, "FuseFakeQuantizeTransformation", 0); + +FuseFakeQuantizeTransformation::FuseFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "FuseFakeQuantizeTransformation"); + this->register_matcher(m, callback); } -bool FuseFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FuseFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr fakeQuantize = as_type_ptr(m.get_match_root()); do { fakeQuantize = handle(context, fakeQuantize); diff --git a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp index 734d9abec435ec..ccff4188d3a5c1 100644 --- a/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fuse_multiply_to_fake_quantize.cpp @@ -5,6 +5,8 @@ #include "low_precision/fuse_multiply_to_fake_quantize.hpp" #include #include +#include +#include "low_precision/rt_info/intervals_alignment_attribute.hpp" #include "low_precision/fake_quantize.hpp" #include "low_precision/network_helper.hpp" @@ -12,11 +14,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void FuseMultiplyToFakeQuantizeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FuseMultiplyToFakeQuantizeTransformation, "FuseMultiplyToFakeQuantizeTransformation", 0); + +FuseMultiplyToFakeQuantizeTransformation::FuseMultiplyToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "FuseMultiplyToFakeQuantizeTransformation"); + this->register_matcher(m, callback); } -bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { const auto multiply = m.get_match_root(); if (!canBeTransformed(context, multiply)) { return false; @@ -65,6 +80,11 @@ bool FuseMultiplyToFakeQuantizeTransformation::transform(TransformationContext& replace_node(multiply, newFakeQuantize); NetworkHelper::copyInfo(fakeQuantize, newFakeQuantize); + const auto intervalAlignment = getAttribute(fakeQuantize); + if ((intervalAlignment != nullptr) && (intervalAlignment->get()->levels != 0ul)) { + newFakeQuantize->set_levels(intervalAlignment->get()->levels); + } + updateOutput(context, newFakeQuantize, multiply); return true; } diff --git a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp index 8d8d9968802e44..b8ec9b192fd272 100644 --- a/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp +++ b/inference-engine/src/low_precision_transformations/src/fuse_subtract_to_fake_quantize.cpp @@ -5,6 +5,7 @@ #include "low_precision/fuse_subtract_to_fake_quantize.hpp" #include #include +#include #include "low_precision/fake_quantize.hpp" #include "low_precision/network_helper.hpp" @@ -12,11 +13,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void FuseSubtractToFakeQuantizeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FuseSubtractToFakeQuantizeTransformation, "FuseSubtractToFakeQuantizeTransformation", 0); + +FuseSubtractToFakeQuantizeTransformation::FuseSubtractToFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "FuseSubtractToFakeQuantizeTransformation"); + this->register_matcher(m, callback); } -bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool FuseSubtractToFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { const auto subtract = m.get_match_root(); if (!canBeTransformed(context, subtract)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/group_convolution.cpp b/inference-engine/src/low_precision_transformations/src/group_convolution.cpp index 8dd7b0b1ce727e..42d9600d13c7a0 100644 --- a/inference-engine/src/low_precision_transformations/src/group_convolution.cpp +++ b/inference-engine/src/low_precision_transformations/src/group_convolution.cpp @@ -8,24 +8,35 @@ #include #include +#include #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -GroupConvolutionTransformation::GroupConvolutionTransformation(const Params& params) : ConvolutionTransformation(params) { -} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::GroupConvolutionTransformation, "GroupConvolutionTransformation", 0); -void GroupConvolutionTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +GroupConvolutionTransformation::GroupConvolutionTransformation(const Params& params) : ConvolutionTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "GroupConvolutionTransformation"); + this->register_matcher(m, callback); } -bool GroupConvolutionTransformation::isQuantized(std::shared_ptr layer) const noexcept { - return WeightableLayerTransformation::isQuantized(layer, true); +bool GroupConvolutionTransformation::isQuantized(const std::shared_ptr& layer) const noexcept { + return GroupConvolutionTransformation::isQuantizedStatic(layer); } -bool GroupConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool GroupConvolutionTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { auto convolution = m.get_match_root(); if (!GroupConvolutionTransformation::canBeTransformed(context, convolution)) { @@ -36,6 +47,10 @@ bool GroupConvolutionTransformation::transform(TransformationContext &context, n return true; } +bool GroupConvolutionTransformation::isQuantizedStatic(const std::shared_ptr& layer) noexcept { + return WeightableLayerTransformation::isQuantizedStatic(layer, true); +} + } // namespace low_precision } // namespace pass } // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/interpolate.cpp b/inference-engine/src/low_precision_transformations/src/interpolate.cpp index 66aba3fc7c429f..b8538bfd14b5d1 100644 --- a/inference-engine/src/low_precision_transformations/src/interpolate.cpp +++ b/inference-engine/src/low_precision_transformations/src/interpolate.cpp @@ -9,30 +9,50 @@ #include #include +#include +#include #include "low_precision/network_helper.hpp" using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; -void InterpolateTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label(), - make_op_label(), make_op_label() })); - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label(), - make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::InterpolateTransformation, "InterpolateTransformation", 0); + +InterpolateTransformation::InterpolateTransformation(const Params& params) : LayerTransformation(params) { + auto mul = pattern::wrap_type(); + + auto interpolate1 = pattern::wrap_type({ + mul, + pattern::wrap_type() }); + + auto interpolate4 = pattern::wrap_type({ + mul, + pattern::wrap_type(), + pattern::wrap_type() }); + + auto interpolate4_2 = pattern::wrap_type({ + mul, + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto matcher = std::make_shared( + std::make_shared(OutputVector{ interpolate1, interpolate4, interpolate4_2 }), + "InterpolateTransformation"); + + this->register_matcher(matcher, callback); } -bool InterpolateTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool InterpolateTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { std::shared_ptr interpolate = m.get_match_root(); if (!canBeTransformed(context, m.get_match_root())) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp index d1dc736e536ed4..14d21fa29b67c3 100644 --- a/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/layer_transformation.cpp @@ -25,50 +25,16 @@ const char LayerTransformation::originalLayerPostfix[] = "_original"; LayerTransformation::LayerTransformation(const Params& params) : updatePrecisions(params.updatePrecisions), - quantizedTensorAlignmentOnActivations(params.quantizedTensorAlignmentOnActivations), - quantizedTensorAlignmentOnWeights(params.quantizedTensorAlignmentOnWeights), - supportAsymmetricQuantization(params.supportAsymmetricQuantization), - precisionsOnActivations(params.precisionsOnActivations), - precisionsOnWeights(params.precisionsOnWeights), - deqPrecision(params.deqPrecision), - support3DTensorOnActivations(params.support3DTensorOnActivations), - deconvolutionSpecificChannelsRatio(params.deconvolutionSpecificChannelsRatio), - quantizationIntervalAsymmetryThreshold(0.002f), - zeroThreshold(1.e-6f), - minQuantizationLevels(2ul), - paramsManager(nullptr), - layerTransformationsManager(nullptr) {} - -void LayerTransformation::setParamsManager(IParamsManager* paramsManager) noexcept { - this->paramsManager = paramsManager; -} + deqPrecision(params.deqPrecision) {} -void LayerTransformation::setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept { - this->layerTransformationsManager = layerTransformationsManager; +void LayerTransformation::setContext(TransformationContext* context) noexcept { + this->context = context; } void LayerTransformation::setUpdatePrecisions(const bool updatePrecisions) { this->updatePrecisions = updatePrecisions; } -void LayerTransformation::setQuantizedTensorAlignmentOnActivations( - const QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) { - this->quantizedTensorAlignmentOnActivations = quantizedTensorAlignmentOnActivations; -} - -void LayerTransformation::setQuantizedTensorAlignmentOnWeights( - const QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) { - this->quantizedTensorAlignmentOnWeights = quantizedTensorAlignmentOnWeights; -} - -const std::vector& LayerTransformation::getPrecisionsOnActivations() const { - return precisionsOnActivations; -} - -const std::vector& LayerTransformation::getPrecisionsOnWeights() const { - return precisionsOnWeights; -} - bool LayerTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const { if (!isQuantized(layer)) { return false; @@ -78,6 +44,10 @@ bool LayerTransformation::canBeTransformed(const TransformationContext& context, return false; } + return canBeTransformedStatic(layer); +} + +bool LayerTransformation::canBeTransformedStatic(const std::shared_ptr& layer) { for (const auto& output : layer->outputs()) { const auto rank = output.get_partial_shape().rank(); if (rank.is_dynamic()) { @@ -120,13 +90,13 @@ bool LayerTransformation::canBeTransformed(const TransformationContext& context, if ((dequantization.subtract != nullptr) && (!perChannelQuantization( dequantization.subtract->get_output_partial_shape(0), - dequantization.subtract->get_input_shape(1)))) { + dequantization.subtractConstant->get_shape()))) { return false; } if ((dequantization.multiply != nullptr) && (!perChannelQuantization( dequantization.multiply->get_output_partial_shape(0), - dequantization.multiply->get_input_shape(1)))) { + dequantization.multiplyConstant->get_shape()))) { return false; } } @@ -158,19 +128,11 @@ bool LayerTransformation::canBeTransformedSpatialDimension(const TransformationC return true; } -bool LayerTransformation::canSubtractBeHandled(const std::shared_ptr& op, const size_t parentIndex) const { - return canSubtractBeHandled(op, NetworkHelper::getDequantization(op, parentIndex)); -} - bool LayerTransformation::canSubtractBeHandled(const std::shared_ptr& op, const FakeQuantizeDequantization& dequantization) const { if (dequantization.empty() || (dequantization.subtract == nullptr)) { return true; } - if (!supportAsymmetricQuantization) { - return false; - } - if (!updatePrecisions) { return true; } @@ -229,36 +191,31 @@ void LayerTransformation::printDequantizationValues( } #endif -void LayerTransformation::setQuantizationIntervalAsymmetryThreshold(const float value) { - this->quantizationIntervalAsymmetryThreshold = value; -} +LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails( + const size_t quantizationLevels, + const std::vector& outputLowValues, + const std::vector& outputHighValues) { + // TODO: workaround: hardcoded values + const float zeroThreshold = 1.e-6f; + const float quantizationIntervalAsymmetryThreshold = 0.002f; -void LayerTransformation::setZeroThreshold(const float value) { - this->zeroThreshold = value; -} - -void LayerTransformation::setMinQuantizationLevels(const size_t levels) { - this->minQuantizationLevels = levels; -} - -LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(const QuantizationDetails& quantizationDetails) const { const float asymmetricIntervalSideRatio256 = -128.f / 127.f; bool hasNegative = false; bool signedPrecision = true; bool unsignedPrecision = true; bool hasZeroPoint = false; - for (size_t i = 0; i < quantizationDetails.outputLowValues.size(); ++i) { - const bool signedInterval = std::signbit(quantizationDetails.outputLowValues[i]) != std::signbit(quantizationDetails.outputHighValues[i]); - const bool outputLowValueIsNotZero = std::fabs(quantizationDetails.outputLowValues[i]) >= zeroThreshold; + for (size_t i = 0; i < outputLowValues.size(); ++i) { + const bool signedInterval = std::signbit(outputLowValues[i]) != std::signbit(outputHighValues[i]); + const bool outputLowValueIsNotZero = std::fabs(outputLowValues[i]) >= zeroThreshold; if (signedInterval && outputLowValueIsNotZero) { // signed unsignedPrecision = false; hasNegative = true; - if (quantizationDetails.outputHighValues[i] != 0.f) { - const float expectedRatio = quantizationDetails.levels == 256 ? asymmetricIntervalSideRatio256 : -1.f; - const float actualRatio = quantizationDetails.outputLowValues[i] / quantizationDetails.outputHighValues[i]; + if (outputHighValues[i] != 0.f) { + const float expectedRatio = quantizationLevels == 256 ? asymmetricIntervalSideRatio256 : -1.f; + const float actualRatio = outputLowValues[i] / outputHighValues[i]; const float actual = std::fabs((actualRatio - expectedRatio) / std::min(actualRatio, expectedRatio)); if (actual > quantizationIntervalAsymmetryThreshold) { hasZeroPoint = true; @@ -291,6 +248,17 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c } } + // TODO: use this implementation after merge <= not aligned with master +// if (signedPrecision && (!unsignedPrecision)) { +// return LayerTransformation::PrecisionDetails(element::i8, hasNegative, hasZeroPoint); +// } +// +// if ((!signedPrecision) && unsignedPrecision) { +// return LayerTransformation::PrecisionDetails(element::u8, hasNegative, hasZeroPoint); +// } +// +// THROW_TRANSFORMATION_EXCEPTION << "unexpected interval"; + if (!hasZeroPoint) { if (signedPrecision && (!unsignedPrecision)) { return LayerTransformation::PrecisionDetails(element::i8, hasNegative, hasZeroPoint); @@ -304,135 +272,51 @@ LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(c return LayerTransformation::PrecisionDetails(element::undefined, hasNegative, hasZeroPoint); } -bool LayerTransformation::isQuantized(std::shared_ptr layer) const noexcept { +LayerTransformation::PrecisionDetails LayerTransformation::getPrecisionDetails(const QuantizationDetails& quantizationDetails) { + return getPrecisionDetails(quantizationDetails.levels, quantizationDetails.outputLowValues, quantizationDetails.outputHighValues); +} + +bool LayerTransformation::isAsymmetricQuantization(const std::shared_ptr& layer) { + const auto nonConstNode = const_cast(layer.get())->shared_from_this(); + const auto dequantization = NetworkHelper::getDequantization(nonConstNode); + return dequantization.subtract != nullptr; +} + +bool LayerTransformation::isQuantized(const std::shared_ptr& layer) const noexcept { return true; } DataPrecision LayerTransformation::getDataPrecision( - std::shared_ptr layer, + const std::shared_ptr& layer, const QuantizationDetails& quantizationDetails, - const bool onWeights) const { + const std::vector& precisions) { #ifdef LPT_PRINT_DEQUANTIZATION_INFO printDequantizationInfo(layer); #endif - std::vector precisions = onWeights ? precisionsOnWeights : precisionsOnActivations; PrecisionDetails precisionDetailsAtOutputIntervals = getPrecisionDetails(quantizationDetails); - { - if (precisionDetailsAtOutputIntervals.precision != element::undefined) { - if (!onWeights) { - fillAvailablePrecisions(layer, precisions); - } - - // if supportedPrecisions is empty then use the first available, not supported layer will be in original precision - if (!precisions.empty()) { - const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision); - const element::Type resultPrecision = foundIt != precisions.end() ? - precisionDetailsAtOutputIntervals.precision : - *precisions.begin(); - const DataPrecision dataPrecision( - resultPrecision, - DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels), - DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels), - foundIt != precisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true); - -#ifdef LPT_PRINT_DEQUANTIZATION_INFO - printDequantizationInfo(dataPrecision); -#endif - return dataPrecision; - } + if (precisionDetailsAtOutputIntervals.precision != element::undefined) { + // if supportedPrecisions is empty then use the first available, not supported layer will be in original precision + if (!precisions.empty()) { + const auto foundIt = std::find(precisions.begin(), precisions.end(), precisionDetailsAtOutputIntervals.precision); + const element::Type resultPrecision = foundIt != precisions.end() ? + precisionDetailsAtOutputIntervals.precision : + *precisions.begin(); + + const DataPrecision dataPrecision( + resultPrecision, + DataPrecision::getMinValue(resultPrecision, quantizationDetails.levels), + DataPrecision::getMaxValue(resultPrecision, quantizationDetails.levels), + foundIt != precisions.end() ? precisionDetailsAtOutputIntervals.hasZeroPoint : true); + + return dataPrecision; } } - - const DataPrecision dataPrecision = precisions.empty() ? - DataPrecision(element::undefined, 0.f, 0.f, false) : - DataPrecision( - *precisions.begin(), - DataPrecision::getMinValue(*precisions.begin(), quantizationDetails.levels), - DataPrecision::getMaxValue(*precisions.begin(), quantizationDetails.levels), - true); -#ifdef LPT_PRINT_DEQUANTIZATION_INFO - printDequantizationInfo(dataPrecision); -#endif - return dataPrecision; -} - -void LayerTransformation::fillAvailablePrecisions(std::shared_ptr layer, std::vector& availablePrecisions) const { - if (availablePrecisions.empty()) { - return; - } - - const std::vector> children = NetworkHelper::consumers(layer); - for (auto child : children) { - if (child->get_type_info().is_castable(opset1::FakeQuantize::get_type_info_static())) { - // FakeQuantize layer updates precision - continue; - } - - if (!layerTransformationsManager->isQuantized(child)) { - // low precision chain is interrupted here: next operation supported precisions are ignored - continue; - } - - const std::vector childPrecisionsOnActivations = paramsManager->getPrecisionsOnActivations(*child); - if (childPrecisionsOnActivations.size() == 0ul) { - continue; - } - - for (size_t index = 0ul; index < availablePrecisions.size();) { - const element::Type availablePrecision = availablePrecisions[index]; - if (!std::any_of( - childPrecisionsOnActivations.begin(), - childPrecisionsOnActivations.end(), - [&](const element::Type precision) { return availablePrecision == precision; })) { - availablePrecisions.erase(availablePrecisions.begin() + index); - } else { - ++index; - } - } - - if (!layerTransformationsManager->isPrecisionPreserved(child)) { - continue; - } - - fillAvailablePrecisions(child, availablePrecisions); - if (availablePrecisions.empty()) { - return; - } - } -} - -std::vector> LayerTransformation::getChildrenRecursivelyExceptPrecisionPreserved( - const std::shared_ptr& op) const noexcept { - std::queue> notHandledChildren; - - for (const auto& output : op->outputs()) { - for (const auto& input : output.get_target_inputs()) { - std::shared_ptr child = input.get_node()->shared_from_this(); - notHandledChildren.emplace(child); - } - } - - std::vector> resultChildren; - - while (!notHandledChildren.empty()) { - const std::shared_ptr operation = notHandledChildren.front(); - notHandledChildren.pop(); - - if (!this->layerTransformationsManager->isPrecisionPreserved(operation)) { - resultChildren.push_back(operation); - continue; - } - - for (const auto& output : operation->outputs()) { - for (const auto& input : output.get_target_inputs()) { - std::shared_ptr child = input.get_node()->shared_from_this(); - notHandledChildren.emplace(child); - } - } - } - - return resultChildren; + return DataPrecision( + precisionDetailsAtOutputIntervals.precision, + 0.f, + 0.f, + precisionDetailsAtOutputIntervals.hasZeroPoint); } std::shared_ptr LayerTransformation::moveDequantizationAfter( @@ -450,15 +334,15 @@ void LayerTransformation::updateOutput( TransformationContext &context, std::shared_ptr lastNode, std::shared_ptr originalNode) const { - const size_t outputSize = context.function->get_output_size(); - for (size_t i = 0; i < outputSize; ++i) { - std::shared_ptr result = context.function->get_output_op(i); - std::shared_ptr outputNode = result->get_input_node_shared_ptr(0); - if (outputNode.get() == lastNode.get()) { - const std::string originalName = originalNode->get_friendly_name(); - originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix); - lastNode->set_friendly_name(originalName); - break; + // TODO: not tested!!! + for (auto output : lastNode->outputs()) { + for (auto input : output.get_target_inputs()) { + if (is_type(input.get_node())) { + const std::string originalName = originalNode->get_friendly_name(); + originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix); + lastNode->set_friendly_name(originalName); + break; + } } } } @@ -478,7 +362,7 @@ void LayerTransformation::updateOutput( } } -void LayerTransformation::addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr patternRoot) const { +void LayerTransformation::addPattern(ngraph::pass::GraphRewrite& pass, TransformationContext& context, std::shared_ptr patternRoot) { ngraph::graph_rewrite_callback internal_callback = [this, &context](ngraph::pattern::Matcher &m) { const bool result = transform(context, m); (void)result; diff --git a/inference-engine/src/low_precision_transformations/src/low_precision.cpp b/inference-engine/src/low_precision_transformations/src/low_precision.cpp new file mode 100644 index 00000000000000..a138b484d7f0d2 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/low_precision.cpp @@ -0,0 +1,283 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/low_precision.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "low_precision/align_quantization_intervals.hpp" +#include "low_precision/fake_quantize_decomposition.hpp" +#include "low_precision/markup_precisions.hpp" +#include "low_precision/markup_can_be_quantized.hpp" +#include "low_precision/markup_avg_pool_precision_preserved.hpp" +#include "low_precision/propagate_precisions.hpp" +#include "low_precision/align_quantization_parameters.hpp" + +#include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" +#include "low_precision/fold_convert.hpp" +#include "low_precision/pull_reshape_through_dequantization.hpp" +#include "low_precision/pull_transpose_through_dequantization.hpp" + +// branch specific transformations +#include "low_precision/concat.hpp" + +#include "low_precision/fake_quantize_decomposition.hpp" + +// general transformations +#include "low_precision/add.hpp" +#include "low_precision/avg_pool.hpp" +#include "low_precision/clamp.hpp" +#include "low_precision/convolution.hpp" +#include "low_precision/convolution_backprop_data.hpp" +#include "low_precision/depth_to_space.hpp" +#include "low_precision/fake_quantize.hpp" +#include "low_precision/group_convolution.hpp" +#include "low_precision/interpolate.hpp" +#include "low_precision/mat_mul.hpp" +#include "low_precision/max_pool.hpp" +#include "low_precision/multiply.hpp" +#include "low_precision/mvn.hpp" +#include "low_precision/normalize_l2.hpp" +#include "low_precision/prelu.hpp" +#include "low_precision/reduce_max.hpp" +#include "low_precision/reduce_mean.hpp" +#include "low_precision/reduce_min.hpp" +#include "low_precision/reduce_sum.hpp" +#include "low_precision/reshape.hpp" +#include "low_precision/relu.hpp" +#include "low_precision/squeeze.hpp" +#include "low_precision/subtract.hpp" +#include "low_precision/split.hpp" +#include "low_precision/shuffle_channels.hpp" +#include "low_precision/strided_slice.hpp" +#include "low_precision/transpose.hpp" +#include "low_precision/unsqueeze.hpp" +#include "low_precision/variadic_split.hpp" + +// cleanup transformations +#include "low_precision/convert.hpp" +#include "low_precision/fold_fake_quantize.hpp" +#include "low_precision/fuse_convert.hpp" +#include "low_precision/fuse_fake_quantize.hpp" +#include "low_precision/fuse_subtract_to_fake_quantize.hpp" +#include "low_precision/fuse_multiply_to_fake_quantize.hpp" +#include "low_precision/multiply_to_group_convolution.hpp" +#include "low_precision/subtract_multiply_to_multiply_add.hpp" + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::LowPrecision, "LowPrecision", 0); + +ngraph::pass::low_precision::LowPrecision::LowPrecision( + const std::vector& precisionRestrictions, + const std::vector& quantizationRestrictions, + const LayerTransformation::Params params) : + precisionRestrictions(precisionRestrictions), + quantizationRestrictions(quantizationRestrictions), + params(params) { +} + +using namespace ngraph::pass::low_precision; + +template +void make_matcher_type_relaxed(ngraph::pass::GraphRewrite* transformation) { + using namespace ngraph; + + auto is_op_type = [](std::shared_ptr n) { + return !!as_type_ptr(n); + }; + + auto p_node = std::make_shared(element::f32, Shape{}, is_op_type); + + ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher& m) { + auto l_node = std::dynamic_pointer_cast(m.get_match_root()); + if (std::dynamic_pointer_cast(l_node)) { + return false; + } + if (!l_node) { + THROW_IE_LPT_EXCEPTION(*l_node) << "unexpected operation type"; + } + + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "LowPrecisionTypeRelaxedMatcher"); + + std::vector inputPrecisions; + for (auto& inputs : l_node->inputs()) { + inputPrecisions.push_back(inputs.get_element_type()); + } + + std::vector outputPrecisions; + for (auto& output : l_node->outputs()) { + outputPrecisions.push_back(output.get_element_type()); + } + + auto replacement = std::make_shared>(*l_node, inputPrecisions, outputPrecisions); + + copy_runtime_info(l_node, replacement); + replace_node(l_node, replacement); + return true; + }; + + auto m = std::make_shared(p_node, "TypeRelaxedReplacer"); + NGRAPH_SUPPRESS_DEPRECATED_START + transformation->add_matcher(m, callback, ngraph::pass::PassProperty::CHANGE_DYNAMIC_STATE); + NGRAPH_SUPPRESS_DEPRECATED_END +} + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::TypeRelaxedReplacer, "TypeRelaxedReplacer", 0); + +ngraph::pass::low_precision::TypeRelaxedReplacer::TypeRelaxedReplacer() { + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); + make_matcher_type_relaxed(this); +} + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MarkupOptimizations, "MarkupOptimizations", 0); + +MarkupOptimizations::MarkupOptimizations( + const std::vector& precisionRestrictions, + const std::vector& quantizationRestrictions) : + precisionRestrictions(precisionRestrictions), + quantizationRestrictions(quantizationRestrictions) {} + +bool ngraph::pass::low_precision::MarkupOptimizations::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager markup(get_pass_config()); + markup.set_per_pass_validation(false); + markup.register_pass(); + if (!precisionRestrictions.empty()) { + markup.register_pass(precisionRestrictions); + } + if (!quantizationRestrictions.empty()) { + markup.register_pass(quantizationRestrictions); + } + if (ngraph::op::util::has_op_with_type(f)) { + markup.register_pass(); + } + markup.register_pass(); + if (ngraph::op::util::has_op_with_type(f)) { + markup.register_pass(); + markup.register_pass(); + } + markup.run_passes(f); + return false; +} + +bool ngraph::pass::low_precision::LowPrecision::run_on_function(std::shared_ptr f) { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "LowPrecision"); + + auto passConfig = get_pass_config(); + ngraph::pass::Manager manager(passConfig); + + auto prerequisites = manager.register_pass(); + const std::vector supportedTypes = {ngraph::element::i8, ngraph::element::u8}; + prerequisites->add_matcher(supportedTypes); + prerequisites->add_matcher(supportedTypes); + prerequisites->add_matcher(); + + manager.register_pass(); + + manager.register_pass(precisionRestrictions, quantizationRestrictions); + + std::shared_ptr common = manager.register_pass(); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + common->add_matcher(params); + + std::shared_ptr cleanup = manager.register_pass(); + cleanup->add_matcher(params); + cleanup->add_matcher(params); + cleanup->add_matcher(params); + cleanup->add_matcher(params); + // WA: precision restrictions for groupConv must be propagated to MultiplyToGroupConvolution transformation + cleanup->add_matcher( + params, + OperationPrecisionRestriction::getPrecisionsByOperationType(precisionRestrictions)); + manager.register_pass(params); + manager.register_pass(params); + manager.register_pass(); + + manager.run_passes(f); + return false; +} + +bool ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(const std::shared_ptr& function) { + std::set> handledNodes; + std::deque> nodes; + for (auto result : function->get_results()) { + nodes.push_front(result); + } + + while (!nodes.empty()) { + auto node = nodes.front(); + nodes.pop_front(); + + for (size_t i = 0; i < node->inputs().size(); ++i) { + auto parent = node->get_input_node_shared_ptr(i); + if (handledNodes.find(parent) != handledNodes.end()) { + continue; + } + + const std::shared_ptr fakeQuantize = as_type_ptr(parent); + if ((fakeQuantize != nullptr) && + QuantizationDetails::outputLayoutIsSupported(fakeQuantize) && + QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels())) { + return true; + } + + nodes.push_front(parent); + handledNodes.insert(parent); + } + } + return false; +} diff --git a/inference-engine/src/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp b/inference-engine/src/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp new file mode 100644 index 00000000000000..2dc256920c74b8 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/markup_avg_pool_precision_preserved.cpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/markup_avg_pool_precision_preserved.hpp" +#include +#include +#include "low_precision/create_precisions_dependent_attribute.hpp" +#include "low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp" +#include "low_precision/propagate_through_precision_preserved.hpp" +#include "low_precision/update_shared_precision_preserved.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved, "MarkupAvgPoolPrecisionPreserved", 0); + +bool ngraph::pass::low_precision::MarkupAvgPoolPrecisionPreserved::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager manager; + manager.set_per_pass_validation(false); + std::shared_ptr markupAvgPoolPrecision = manager.register_pass(); + markupAvgPoolPrecision->add_matcher>(); + markupAvgPoolPrecision->add_matcher>(); + markupAvgPoolPrecision->add_matcher>(); + manager.run_passes(f); + return false; +} diff --git a/inference-engine/src/low_precision_transformations/src/markup_can_be_quantized.cpp b/inference-engine/src/low_precision_transformations/src/markup_can_be_quantized.cpp new file mode 100644 index 00000000000000..3117efc2debd14 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/markup_can_be_quantized.cpp @@ -0,0 +1,59 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/markup_can_be_quantized.hpp" + +#include + +#include +#include "low_precision/convolution.hpp" +#include "low_precision/convolution_backprop_data.hpp" +#include "low_precision/group_convolution.hpp" +#include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/precisions_attribute.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MarkupCanBeQuantized, "MarkupCanBeQuantized", 0); + +bool ngraph::pass::low_precision::MarkupCanBeQuantized::run_on_function(std::shared_ptr f) { + auto setEmptyPrecisions = [](const std::shared_ptr& node) { + for (auto& input : node->inputs()) { + auto& rt = input.get_rt_info(); + + auto attribute = ngraph::pass::low_precision::make_shared_attribute(std::vector()); + auto attributeWrapper = std::make_shared>>(attribute); + + rt.emplace( + ngraph::VariantWrapper>::type_info.name, + attributeWrapper); + } + }; + + for (const std::shared_ptr& node : f->get_ordered_ops()) { + if (node->get_input_size() == 0 || transformation_callback(node)) { + continue; + } + + if (const auto convolution = std::dynamic_pointer_cast(node)) { + if (!ConvolutionTransformation::isQuantizedStatic(convolution)) { + setEmptyPrecisions(convolution); + } + continue; + } + if (const auto convolutionBackpropData = std::dynamic_pointer_cast(node)) { + if (!ConvolutionBackpropDataTransformation::isQuantizedStatic(convolutionBackpropData)) { + setEmptyPrecisions(convolutionBackpropData); + } + continue; + } + if (const auto groupConvolution = std::dynamic_pointer_cast(node)) { + if (!GroupConvolutionTransformation::isQuantizedStatic(groupConvolution)) { + setEmptyPrecisions(groupConvolution); + } + continue; + } + } + return true; +} diff --git a/inference-engine/src/low_precision_transformations/src/markup_per_tensor_quantization.cpp b/inference-engine/src/low_precision_transformations/src/markup_per_tensor_quantization.cpp new file mode 100644 index 00000000000000..4cd37c94658a53 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/markup_per_tensor_quantization.cpp @@ -0,0 +1,85 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/markup_per_tensor_quantization.hpp" + +#include +#include +#include +#include +#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MarkupPerTensorQuantization, "MarkupPerTensorQuantization", 0); + +ngraph::pass::low_precision::MarkupPerTensorQuantization::MarkupPerTensorQuantization( + const std::vector& restrictions) { + for (const OperationPerTensorQuantizationRestriction& restriction : restrictions) { + const auto it = restrictionsByOperation.find(restriction.operationType.name); + if (it == restrictionsByOperation.end()) { + PerTensorQuantization r(restriction.specifyVersion); + r.portsByVersion.emplace(restriction.operationType.version, restriction.restrictedPorts); + restrictionsByOperation.emplace(restriction.operationType.name, r); + } else { + it->second.add(restriction.operationType.version, restriction.restrictedPorts); + } + } +} + +bool ngraph::pass::low_precision::MarkupPerTensorQuantization::run_on_function(std::shared_ptr f) { + auto setRestriction = [](const std::shared_ptr& node, const std::vector& restrictedPorts) { + auto createAttribute = [](Input& input){ + auto &rt = input.get_rt_info(); + rt.emplace( + ngraph::VariantWrapper::type_info.name, + std::make_shared<::ngraph::VariantWrapper>(PerTensorQuantizationAttribute())); + }; + + if (restrictedPorts.empty()) { + // markup all ports + for (size_t item = 0ul; item < node->get_input_size(); item++) { + Input input = node->input(item); + createAttribute(input); + } + } else { + // markup specific ports + for (const size_t item : restrictedPorts) { + Input input = node->input(item); + createAttribute(input); + } + } + }; + + for (const std::shared_ptr& node : f->get_ordered_ops()) { + if (node->get_input_size() == 0) { + continue; + } + + const auto typeIt = restrictionsByOperation.find(node->get_type_info().name); + if (typeIt == restrictionsByOperation.end()) { + continue; + } + + const auto& restriction = typeIt->second; + if (restriction.portsByVersion.empty()) { + continue; + } + + if (restriction.versionIsRequired) { + const auto it2 = restriction.portsByVersion.find(node->get_type_info().version); + if (it2 == restriction.portsByVersion.end()) { + continue; + } + + const std::vector& restrictedPorts = it2->second; + setRestriction(node, restrictedPorts); + } else { + assert(restriction.portsByVersion.size() == 1ul); + const std::vector& restrictedPorts = restriction.portsByVersion.begin()->second; + setRestriction(node, restrictedPorts); + } + } + return true; +} diff --git a/inference-engine/src/low_precision_transformations/src/markup_precisions.cpp b/inference-engine/src/low_precision_transformations/src/markup_precisions.cpp new file mode 100644 index 00000000000000..17747179345c1f --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/markup_precisions.cpp @@ -0,0 +1,217 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/markup_precisions.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include "low_precision/network_helper.hpp" +#include "low_precision/rt_info/precisions_attribute.hpp" +#include "low_precision/rt_info/precision_preserved_attribute.hpp" + +using namespace ngraph; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MarkupPrecisions, "MarkupPrecisions", 0); + +ngraph::pass::low_precision::MarkupPrecisions::MarkupPrecisions(const std::vector& restrictions) { + for (const auto& restriction : restrictions) { + const auto it = restrictionsByOperation.find(restriction.operationType.name); + if (it == restrictionsByOperation.end()) { + Restriction r(restriction.specifyVersion); + r.precisionsByVersion.emplace(restriction.operationType.version, restriction.precisionsByPort); + restrictionsByOperation.emplace(restriction.operationType.name, r); + } else { + it->second.add(restriction.operationType.version, restriction.precisionsByPort); + } + } +} + +namespace { +void setRestriction( + const std::shared_ptr& node, + const std::vector>>& precisionsByPort) { + if (precisionsByPort.empty()) { + // if available precisions for any port is empty then mark all input ports + for (auto& input : node->inputs()) { + auto& rt = input.get_rt_info(); + + auto attribute = ngraph::pass::low_precision::make_shared_attribute(std::vector()); + auto attributeWrapper = std::make_shared>>(attribute); + + rt.emplace( + ngraph::VariantWrapper>::type_info.name, + attributeWrapper); + } + } else { + for (const std::pair>& item : precisionsByPort) { + Input input = node->input(item.first); + + auto precisionsAttribute = ngraph::pass::low_precision::getAttribute>(input); + if ((precisionsAttribute != nullptr) && + (precisionsAttribute->get()->sharedValue != nullptr) && + (precisionsAttribute->get()->sharedValue->precisions.empty())) { + return; + } + + auto attribute = ngraph::pass::low_precision::make_shared_attribute(item.second); + auto attributeWrapper = std::make_shared>>(attribute); + + auto& rt = input.get_rt_info(); + rt[ngraph::VariantWrapper>::type_info.name] = attributeWrapper; + } + } +} +} // namespace + +bool ngraph::pass::low_precision::MarkupPrecisions::run_on_function(std::shared_ptr f) { + for (const std::shared_ptr& node : f->get_ordered_ops()) { + if (node->get_input_size() == 0) { + continue; + } + + if (transformation_callback(node)) { + continue; + } + + // TODO: don't need to set restrictions for not supported operations + // if don't set restrictions for not supported operations then accuracy drop appears, issue #59197 + const bool supported = is_type(node) || isSupported(node); + if (!supported || !LayerTransformation::canBeTransformedStatic(node)) { + setRestriction(node, std::vector>> { {0ul, {}}}); + continue; + } + + const bool precisionPreserved = isPrecisionPreserved(node); + if (precisionPreserved) { + auto& rt = node->get_rt_info(); + rt.emplace( + ngraph::VariantWrapper::type_info.name, + std::make_shared<::ngraph::VariantWrapper>( + make_shared_attribute(precisionPreserved))); + } + + const auto& typeInfo = node->get_type_info(); + auto it = restrictionsByOperation.find(typeInfo.name); + if (it != restrictionsByOperation.end()) { + const Restriction& r = it->second; + if (r.versionIsRequired) { + const auto it2 = r.precisionsByVersion.find(typeInfo.version); + if (it2 == r.precisionsByVersion.end()) { + continue; + } + + const std::vector>>& precisionsByPort = it2->second; + setRestriction(node, precisionsByPort); + } else { + assert(r.precisionsByVersion.size() == 1ul); + + const std::vector>>& precisionsByPort = r.precisionsByVersion.begin()->second; + setRestriction(node, precisionsByPort); + } + } + } + return true; +} + +template +std::string name() { + return Operation::get_type_info_static().name; +} + +bool ngraph::pass::low_precision::MarkupPrecisions::isPrecisionPreserved(const std::shared_ptr& node) { + if (isDisabled(node)) { + return false; + } + + // TODO: think how to handle conditions <= not mandatory for PoC + // TODO: operation set version is not affected <= not mandatory for PoC + static std::unordered_set precisionPreservedOps = { + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + // TODO: there are conditions + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() } + }; + + const bool precisionPreserved = precisionPreservedOps.find(node->get_type_name()) != precisionPreservedOps.end(); + if (precisionPreserved) { + return precisionPreserved; + } + + if (is_type(node)) { + std::shared_ptr interpolate1 = as_type_ptr(node); + if (interpolate1) { + const auto attrs = interpolate1->get_attrs(); + return attrs.mode == "nearest"; + } + + std::shared_ptr interpolate4 = as_type_ptr(node); + if (interpolate4) { + const auto attrs = interpolate4->get_attrs(); + return attrs.mode == op::v4::Interpolate::InterpolateMode::nearest; + } + } + + return false; +} + +bool ngraph::pass::low_precision::MarkupPrecisions::isSupported(const std::shared_ptr& node) { + static std::unordered_set supportedOps = { + { name() }, + { name() }, + { name() }, + { name() }, + // ? + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + // TODO: there are conditions + { name() }, + { name() }, + { name() }, + { name() }, + { name() }, + // ? + { name() }, + { name() }, + { name() }, + { name() } + }; + + return supportedOps.find(node->get_type_name()) != supportedOps.end(); +} diff --git a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp index 1d9745da53f9dc..693d0e6490e2e9 100644 --- a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp +++ b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp @@ -9,6 +9,9 @@ #include #include +#include +#include + #include "low_precision/network_helper.hpp" #include "low_precision/common/dequantization_op.hpp" @@ -16,20 +19,33 @@ using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; -bool MatMulTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MatMulTransformation, "MatMulTransformation", 0); + +MatMulTransformation::MatMulTransformation(const Params& params) : LayerTransformation(params) { + auto mul1 = pattern::wrap_type(); + auto mul2 = pattern::wrap_type(); + auto fq2 = pattern::wrap_type(); + auto matcher = pattern::wrap_type({ mul1, std::make_shared(OutputVector{ mul2, fq2 })}); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "MatMulTransformation"); + this->register_matcher(m, callback); +} + +bool MatMulTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { std::shared_ptr matMul = as_type_ptr(m.get_match_root()); if ((matMul == nullptr) || !canBeTransformed(context, matMul)) { return false; } matMul = as_type_ptr(NetworkHelper::separateInStandaloneBranch(matMul)); - if (!support3DTensorOnActivations) { - const auto inputRank = matMul->get_input_partial_shape(0).rank(); - if (inputRank.is_dynamic() || inputRank.get_length() == 3) { - return false; - } - } - const auto dequantization1 = NetworkHelper::getDequantization(matMul, 0); auto dequantization2 = NetworkHelper::getDequantization(matMul, 1); @@ -38,7 +54,12 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat as_type_ptr(dequantization2.data.get_node_shared_ptr()); if (fakeQuantize != nullptr) { const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantize); - const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, true); + + const auto precisionsAttribute = getAttributeFromOutput(fakeQuantize); + const auto precisions = precisionsAttribute == nullptr ? + PrecisionsAttribute::defaultPrecisions : + precisionsAttribute->get()->sharedValue->precisions; + const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions); auto tuple = NetworkHelper::decomposeFakeQuantize( fakeQuantize, @@ -147,27 +168,20 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat replace_node(matMul, newMultiply); copy_runtime_info({ newMultiply, matMul }, newMultiply); - updateOutput(context, newMultiply, matMul); + updateOutput(context, newMultiply, newMatMul); return true; } -void MatMulTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); - - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); -} - bool MatMulTransformation::isPrecisionPreserved(std::shared_ptr layer) const noexcept { return false; } +bool MatMulTransformation::is3DTensorOnActivations(const std::shared_ptr& node) { + const auto inputDataRank = node->get_input_partial_shape(0).rank(); + return inputDataRank.is_dynamic() || inputDataRank.get_length() == 3; +} + bool MatMulTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr layer) const { if (!LayerTransformation::canBeTransformedSpatialDimension(context, layer)) { return false; @@ -204,6 +218,8 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context if (!NetworkHelper::checkZeroPoint(dequantization1.subtract)) { return false; } + } else { + return false; } const auto dequantization2 = NetworkHelper::getDequantization(layer, 1); @@ -240,7 +256,13 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context } const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantize); - const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, true); + + const auto precisionsAttribute = getAttribute(matMul->input(1)); + const auto precisions = precisionsAttribute == nullptr ? + PrecisionsAttribute::defaultPrecisions : + precisionsAttribute->get()->sharedValue->precisions; + + const DataPrecision dataPrecision = getDataPrecision(fakeQuantize, quantizationDetails, precisions); if (dataPrecision.hasZeroPoint) { return false; } @@ -259,6 +281,10 @@ bool MatMulTransformation::canBeTransformed(const TransformationContext& context } } + if (!fakeQuantize && dequantization2.empty()) { + return false; + } + if ((!NetworkHelper::isConstantPath(layer->get_input_node_shared_ptr(1))) && (dequantization1.subtract)) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/max_pool.cpp b/inference-engine/src/low_precision_transformations/src/max_pool.cpp index 4f867cc4bdda49..68a73cac59e522 100644 --- a/inference-engine/src/low_precision_transformations/src/max_pool.cpp +++ b/inference-engine/src/low_precision_transformations/src/max_pool.cpp @@ -8,20 +8,29 @@ #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MaxPoolTransformation, "MaxPoolTransformation", 0); + MaxPoolTransformation::MaxPoolTransformation(const Params& params) : LayerTransformation(params) { -} + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void MaxPoolTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label() })); + auto m = std::make_shared(matcher, "MaxPoolTransformation"); + this->register_matcher(m, callback); } bool MaxPoolTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr op) const { @@ -42,7 +51,7 @@ bool MaxPoolTransformation::canBeTransformed(const TransformationContext& contex return true; } -bool MaxPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool MaxPoolTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/multiply.cpp b/inference-engine/src/low_precision_transformations/src/multiply.cpp index bf354bfc5f0613..d95fe2812c3f1e 100644 --- a/inference-engine/src/low_precision_transformations/src/multiply.cpp +++ b/inference-engine/src/low_precision_transformations/src/multiply.cpp @@ -12,6 +12,8 @@ #include #include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/common/dequantization_op.hpp" #include "low_precision/network_helper.hpp" @@ -20,11 +22,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void MultiplyTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MultiplyTransformation, "MultiplyTransformation", 0); + +MultiplyTransformation::MultiplyTransformation(const Params& params) : EltwiseBaseTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "MultiplyTransformation"); + this->register_matcher(m, callback); } -bool MultiplyTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool MultiplyTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { auto multiply = m.get_match_root(); if (!LayerTransformation::canBeTransformed(context, multiply)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp b/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp index 9d477ed11c4b05..9b4a6147b61c07 100644 --- a/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp +++ b/inference-engine/src/low_precision_transformations/src/multiply_to_group_convolution.cpp @@ -5,17 +5,33 @@ #include "low_precision/multiply_to_group_convolution.hpp" #include #include +#include #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -void MultiplyToGroupConvolutionTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MultiplyToGroupConvolutionTransformation, "MultiplyToGroupConvolutionTransformation", 0); + +MultiplyToGroupConvolutionTransformation::MultiplyToGroupConvolutionTransformation( + const Params& params, + const OperationPrecisionRestriction::PrecisionsByPort& restrictions) : LayerTransformation(params), restrictions(restrictions), groupSize(1ul) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "MultiplyToGroupConvolutionTransformation"); + this->register_matcher(m, callback); } -bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { const auto multiply = m.get_match_root(); if (!canBeTransformed(context, multiply)) { return false; @@ -31,11 +47,34 @@ bool MultiplyToGroupConvolutionTransformation::transform(TransformationContext& } auto dequantization = NetworkHelper::getDequantization(multiply, inputIndex); + if (dequantization.data.get_node() == nullptr) { + return false; + } if (dequantization.subtractConvert != nullptr) { dequantization = NetworkHelper::foldDequantization(multiply, inputIndex); } - const element::Type weightsPrecision = updatePrecisions ? precisionsOnWeights[0] : dequantization.data.get_element_type(); + element::Type weightsPrecision = element::undefined; + if (updatePrecisions) { + // try to find restrictions on weights for GroupConvolution + if (restrictions.size() > 1ul) { + const auto& availablePreisions = restrictions[1].second; + if (!availablePreisions.empty()) { + weightsPrecision = availablePreisions[0]; + } + } + + // if restrictions are absent precisions attribute is used + if (weightsPrecision == element::undefined) { + const auto precisionsAttribute = getAttribute(multiply->input(inputIndex == 0ul ? 1ul : 0ul)); + const auto precisions = precisionsAttribute == nullptr ? + PrecisionsAttribute::defaultPrecisions : + precisionsAttribute->get()->sharedValue->precisions; + weightsPrecision = precisions[0]; + } + } else { + weightsPrecision = dequantization.data.get_element_type(); + } const size_t inputChannelsCount = input->get_output_partial_shape(0)[1].get_length(); const size_t outputChannelsCount = multiply->get_output_partial_shape(0)[1].get_length(); @@ -140,21 +179,21 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma return false; } - const auto dequantization = NetworkHelper::getDequantization(operation, inputIndex); - - if (dequantization.empty()) { - return false; - } - for (size_t i = 2; i < constShape.size(); ++i) { if (constShape[i] != 1) { return false; } } - if (updatePrecisions) { + if (updatePrecisions && restrictions.size() > 0) { + const auto& availablePreisions = restrictions[0].second; + if (availablePreisions.empty()) { + return false; + } + + const auto dequantization = NetworkHelper::getDequantization(operation, inputIndex); const element::Type parentPrecision = dequantization.data.get_element_type(); - if (std::find(precisionsOnActivations.begin(), precisionsOnActivations.end(), parentPrecision) == precisionsOnActivations.end()) { + if (std::find(availablePreisions.begin(), availablePreisions.end(), parentPrecision) == availablePreisions.end()) { return false; } } @@ -162,7 +201,11 @@ bool MultiplyToGroupConvolutionTransformation::canBeTransformed(const Transforma return true; } -bool MultiplyToGroupConvolutionTransformation::isQuantized(std::shared_ptr layer) const noexcept { +bool MultiplyToGroupConvolutionTransformation::isQuantized(const std::shared_ptr& layer) const noexcept { + return MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(layer); +} + +bool MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(const std::shared_ptr& layer) noexcept { const auto parent0 = layer->get_input_node_shared_ptr(0); const auto parent1 = layer->get_input_node_shared_ptr(1); @@ -179,6 +222,35 @@ bool MultiplyToGroupConvolutionTransformation::isQuantized(std::shared_ptr return (pShape.rank().get_length() == 4ul) || (pShape.rank().get_length() == 5ul); } +bool MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(const std::shared_ptr& node) { + auto getConstantIndex = [](const std::shared_ptr& node) -> int { + if (is_type(node->get_input_node_shared_ptr(1))) { + return 1; + } + if (is_type(node->get_input_node_shared_ptr(0))) { + return 0; + } + return -1; + }; + + const int constantIndex = getConstantIndex(node); + if (constantIndex == -1) { + return false; + } + + const Input constantInput = node->input(constantIndex); + const auto shape = constantInput.get_partial_shape(); + if (shape.is_dynamic() || shape.rank().is_dynamic()) { + return true; + } + + if (std::all_of(shape.begin(), shape.end(), [](const Dimension& dimension) { return dimension == 1ul; })) { + return true; + } + + return false; +} + void MultiplyToGroupConvolutionTransformation::setGroupSize(const size_t groupSize) { this->groupSize = groupSize; } diff --git a/inference-engine/src/low_precision_transformations/src/mvn.cpp b/inference-engine/src/low_precision_transformations/src/mvn.cpp index dc6df6d5b0fa4e..7883235e42de44 100644 --- a/inference-engine/src/low_precision_transformations/src/mvn.cpp +++ b/inference-engine/src/low_precision_transformations/src/mvn.cpp @@ -10,6 +10,9 @@ #include #include +#include +#include + #include "ngraph/type/element_type.hpp" #include "ngraph/type/element_type_traits.hpp" #include "low_precision/network_helper.hpp" @@ -21,6 +24,8 @@ using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::MVNTransformation, "MVNTransformation", 0); + namespace mvn { template @@ -38,6 +43,24 @@ std::shared_ptr createNewScalesConst(const ngraph::op::Con } // namespace mvn +MVNTransformation::MVNTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = std::make_shared(OutputVector{ + pattern::wrap_type({ pattern::wrap_type() }), + pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }) + }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "MVNTransformation"); + this->register_matcher(m, callback); +} + bool MVNTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr operation) const { if (!LayerTransformation::canBeTransformed(context, operation)) { return false; @@ -86,19 +109,7 @@ bool MVNTransformation::canBeTransformed(const TransformationContext& context, s return perTensor && isScalarScales; } -void MVNTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label() })); - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), - make_op_label() })); -} - -bool MVNTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool MVNTransformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { std::shared_ptr operation = m.get_match_root(); if (!canBeTransformed(context, operation)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp index 6b26398878ca4f..3f49e8b327cc04 100644 --- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp +++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp @@ -20,6 +20,9 @@ #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/common/dequantization_op.hpp" #include "low_precision/layer_transformation.hpp" +#include "low_precision/rt_info/precision_preserved_attribute.hpp" +#include "low_precision/rt_info/intervals_alignment_attribute.hpp" +#include "low_precision/rt_info/quantization_alignment_attribute.hpp" namespace ngraph { namespace pass { @@ -286,26 +289,65 @@ std::shared_ptr NetworkHelper::swapMultiplyAndAdd(std::shared_ptr{ multiply->get_output_element_type(0) }, ngraph::op::TemporaryReplaceOutputType(newAdd, element::f32).get(), ngraph::op::TemporaryReplaceOutputType(a, element::f32).get()); - copyInfo(multiply, newMultiply); + copyInfo({ multiply, newMultiply }, newMultiply); replace_node(addAfterMultiply, newMultiply); return newMultiply; } -void NetworkHelper::copyInfo(const std::shared_ptr& source, const std::shared_ptr& target) { - // TODO: merge_runtime_info with correctly defined DEQUANTIZATION - const auto& sourceAttributes = source->get_rt_info(); - auto& targetAttrubutes = target->get_rt_info(); - for (auto attribute : sourceAttributes) { - targetAttrubutes[attribute.first] = attribute.second; - } +void NetworkHelper::copyInfo( + const std::vector>& sources, + const std::vector>& targets) { + ngraph::copy_runtime_info(sources, targets); + + for (const auto& target : targets) { + const std::string friendlyName = sources[0]->get_friendly_name(); + if (!friendlyName.empty()) { + target->set_friendly_name(friendlyName); + } + + { + // TODO: has to be implemented in ngraph::copy_runtime_info + + for (auto& source : sources) { + if (target->get_type_info() != source->get_type_info()) { + continue; + } - const std::string friendlyName = source->get_friendly_name(); - if (!friendlyName.empty()) { - target->set_friendly_name(friendlyName); + assert(source->get_input_size() == target->get_input_size()); + for (size_t i = 0; i < target->get_input_size(); ++i) { + auto sourceInput = source->input(i); + const auto& sourceRt = sourceInput.get_rt_info(); + auto targetInput = target->input(i); + auto& targetRt = targetInput.get_rt_info(); + for (const auto& it : sourceRt) { + targetRt[it.first] = it.second; + } + } + + assert(source->get_output_size() == target->get_output_size()); + for (size_t i = 0; i < target->get_output_size(); ++i) { + auto sourceOutput = source->output(i); + const auto& sourceRt = sourceOutput.get_rt_info(); + auto targetOutput = target->output(i); + auto& targetRt = targetOutput.get_rt_info(); + for (const auto& it : sourceRt) { + targetRt[it.first] = it.second; + } + } + } + } } } +void NetworkHelper::copyInfo(const std::vector>& sources, const std::shared_ptr& target) { + copyInfo(sources, std::vector>{ target }); +} + +void NetworkHelper::copyInfo(const std::shared_ptr& source, const std::shared_ptr& target) { + copyInfo(std::vector>{ source }, std::vector>{ target }); +} + void NetworkHelper::cleanRunTimeInfo(const std::shared_ptr& layer) { auto& rt_info = layer->get_rt_info(); auto attributeIter = rt_info.find("DEQUANTIZATION"); @@ -315,7 +357,21 @@ void NetworkHelper::cleanRunTimeInfo(const std::shared_ptr& layer) { } bool NetworkHelper::isScalarLike(std::shared_ptr constant) { - return constant->get_all_data_elements_bitwise_identical(); + // ticket #48857 + // return constant->get_all_data_elements_bitwise_identical(); + + const auto shape = constant->output(0).get_shape(); + if (shape_size(shape) == 1ul) { + return true; + } + + + const auto values = constant->cast_vector(); + if (values.empty()) { + return true; + } + + return !std::any_of(values.begin(), values.end(), [&](float value) { return values[0] != value; }); } bool NetworkHelper::isZero(std::shared_ptr constant) { @@ -524,8 +580,10 @@ std::shared_ptr NetworkHelper::separateInStandaloneBranch(std::sha if (dequantization.isShared()) { Output parent = dequantization.data; if (dequantization.convert != nullptr) { - parent = dequantization.convert->clone_with_new_inputs({ parent }); - parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); + auto convert = dequantization.convert->clone_with_new_inputs({ parent }); + convert->set_friendly_name(""); + copy_runtime_info(parent.get_node_shared_ptr(), convert); + parent = convert->output(0); } if (dequantization.subtract != nullptr) { @@ -537,15 +595,19 @@ std::shared_ptr NetworkHelper::separateInStandaloneBranch(std::sha outputs.push_back(input.get_source_output()); } - parent = dequantization.subtract->clone_with_new_inputs({parent, parentOnWeights->clone_with_new_inputs(outputs) }); - parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); + auto subtract = dequantization.subtract->clone_with_new_inputs({parent, parentOnWeights->clone_with_new_inputs(outputs) }); + subtract->set_friendly_name(""); + copy_runtime_info(parent.get_node_shared_ptr(), subtract); + parent = subtract->output(0); } if (dequantization.multiply != nullptr) { - parent = dequantization.multiply->clone_with_new_inputs({ + auto multiply = dequantization.multiply->clone_with_new_inputs({ parent, dequantization.multiply->get_input_node_shared_ptr(1)->clone_with_new_inputs({}) }); - parent.get_node_shared_ptr()->set_friendly_name(parent.get_node_shared_ptr()->get_name() + "_new"); + multiply->set_friendly_name(""); + copy_runtime_info(parent.get_node_shared_ptr(), multiply); + parent = multiply->output(0); } std::vector> inputs = node->input_values(); @@ -556,7 +618,7 @@ std::shared_ptr NetworkHelper::separateInStandaloneBranch(std::sha const size_t inputIndex = NetworkHelper::getChildInputIndex(originalParent, node); inputs[inputIndex] = parent; const std::shared_ptr newNode = node->clone_with_new_inputs(inputs); - + copy_runtime_info(node, newNode); replace_node(node, newNode); newNode->set_friendly_name(node->get_friendly_name()); @@ -592,10 +654,49 @@ std::shared_ptr NetworkHelper::fuseConvert(const std::shar fakeQuantize->get_levels()); NetworkHelper::setOutDataPrecisionForTypeRelaxed(newFakeQuantize, node->get_output_element_type(0)); replace_node(node->shared_from_this(), newFakeQuantize); - newFakeQuantize->set_friendly_name(fakeQuantize->get_friendly_name()); + NetworkHelper::copyInfo(fakeQuantize, newFakeQuantize); + return newFakeQuantize; } +bool NetworkHelper::isPrecisionPreserved(const std::shared_ptr& node) { + auto& rt = node->get_rt_info(); + auto it = rt.find(ngraph::VariantWrapper::type_info.name); + if (it == rt.end()) { + return false; + } + auto attribute = std::dynamic_pointer_cast>(it->second); + assert(attribute != nullptr); + return attribute->get()->sharedValue->value; +} + +size_t NetworkHelper::calculateLevels( + const float dataPrecisionMin, + const float dataPrecisionMax, + const float combinedIntervalLow, + const float combinedIntervalHigh, + const float minIntervalLow, + const float minIntervalHigh, + float& dequantizationMul, + float& dequantizationSub, + float& updatedOutputLowValue, + float& updatedOutputHighValue) { + const float maxOutputInterval = combinedIntervalHigh - combinedIntervalLow; + // FQ -> SUB_quantization -> MUL_quantization -[INT8]-> SUB_dequantization -> MUL_dequantization -> + const float quantizationMul = (dataPrecisionMax - dataPrecisionMin) / maxOutputInterval; + dequantizationMul = maxOutputInterval / (dataPrecisionMax - dataPrecisionMin); + + // FQ outputLowValue = dataPrecision.min * dequantizationMul - quantizationSub + const float quantizationSub = combinedIntervalLow - dataPrecisionMin * dequantizationMul; + dequantizationSub = std::round(-quantizationSub * quantizationMul); + + updatedOutputLowValue = (minIntervalLow - quantizationSub) * quantizationMul; + updatedOutputHighValue = (minIntervalHigh - quantizationSub) * quantizationMul; + + const size_t levels = static_cast(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0); + return levels; +} + std::shared_ptr NetworkHelper::foldFakeQuantize( const std::shared_ptr& fq, const bool roundValuesArg, @@ -772,7 +873,8 @@ std::shared_ptr NetworkHelper::composeFakeQuantize(const s newFakeQuantize->get_levels(), newFakeQuantize->get_auto_broadcast()); replace_node(dequantization.convert, replacement); - replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + //replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + copyInfo({ fakeQuantize, dequantization.convert }, replacement); NetworkHelper::setOutDataPrecisionForTypeRelaxed(replacement, dequantization.convert->output(0).get_element_type()); newFakeQuantize = replacement; } @@ -791,7 +893,8 @@ std::shared_ptr NetworkHelper::composeFakeQuantize(const s newFakeQuantize->get_levels(), newFakeQuantize->get_auto_broadcast()); replace_node(dequantization.subtract, replacement); - replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + //replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + copyInfo({ newFakeQuantize, dequantization.subtract }, replacement); newFakeQuantize = replacement; } @@ -827,7 +930,8 @@ std::shared_ptr NetworkHelper::composeFakeQuantize(const s newFakeQuantize->get_auto_broadcast()); replace_node(dequantization.multiply, replacement); - replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + //replacement->set_friendly_name(newFakeQuantize->get_friendly_name()); + copyInfo({ newFakeQuantize, dequantization.multiply }, replacement); newFakeQuantize = replacement; } @@ -872,6 +976,12 @@ std::tuple, std::shared_ptr> NetworkHelper::decompos } } + if ((!updatePrecision) && + std::all_of(scales.begin(), scales.end(), [](const float value) { return value == 1.f; }) && + std::all_of(shifts.begin(), shifts.end(), [](const float value) { return value == 0.f; })) { + return std::make_tuple(nullptr, nullptr); + } + std::shared_ptr shift = hasZeroPoint ? std::make_shared(deqPrecision, outputLow.get_shape(), shifts) : nullptr; @@ -980,7 +1090,8 @@ std::shared_ptr NetworkHelper::updateFakeQuantize( std::shared_ptr fq, element::Type precision, float min, - float max) { + float max, + const bool replace) { auto newMin = std::make_shared(fq->get_output_element_type(0), Shape{}, min); auto newMax = std::make_shared(fq->get_output_element_type(0), Shape{}, max); @@ -994,7 +1105,9 @@ std::shared_ptr NetworkHelper::updateFakeQuantize( fq->get_auto_broadcast()); NetworkHelper::setOutDataPrecision(newFQ, precision); - replace_node(fq, newFQ); + if (replace) { + replace_node(fq, newFQ); + } newFQ->set_friendly_name(fq->get_friendly_name()); return newFQ; @@ -1006,9 +1119,12 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization( const ngraph::element::Type originalPrecision, const ngraph::PartialShape dataNodeOutputShape, element::Type precision, - const ngraph::element::Type deqPrecision) { - // TODO: we create input here! we really need it here? - const std::shared_ptr input = std::make_shared(precision, dataNodeOutputShape); + const ngraph::element::Type deqPrecision, + std::shared_ptr input) { + if (input == nullptr) { + // TODO: we create input here! we really need it here? + input = std::make_shared(precision, dataNodeOutputShape); + } std::shared_ptr parent = input; std::shared_ptr convert; @@ -1016,7 +1132,7 @@ FakeQuantizeDequantization NetworkHelper::makeDequantization( convert = nullptr; } else { convert = std::make_shared( - input, + parent, deqPrecision); parent = convert; } @@ -1212,11 +1328,20 @@ FakeQuantizeDequantization NetworkHelper::getDequantization(const std::shared_pt return FakeQuantizeDequantization(dataNode, convert, subtract, subtractConvert, subtractConstant, multiply, multiplyConstant); } -FakeQuantizeDequantization NetworkHelper::getDequantizationBelow(const std::shared_ptr& node) { +FakeQuantizeDequantization NetworkHelper::getDequantizationBelow(const std::shared_ptr& node, const bool convertIsMandatory) { const Output dataNode = node->output(0); - std::shared_ptr lastNode = dataNode.get_target_inputs().begin()->get_node()->shared_from_this(); + const auto& targetInputs = dataNode.get_target_inputs(); + if (targetInputs.size() == 0ul) { + return FakeQuantizeDequantization(); + } + + std::shared_ptr lastNode = targetInputs.begin()->get_node()->shared_from_this(); const std::shared_ptr convert = as_type_ptr(lastNode); + if (convertIsMandatory && (convert == nullptr)) { + return FakeQuantizeDequantization(); + } + if (convert != nullptr) { if ((convert->input(0).get_element_type() != element::i8) && (convert->input(0).get_element_type() != element::u8) && (convert->output(0).get_element_type() != element::f32)) { @@ -1466,11 +1591,13 @@ NetworkHelper::InsertDequantizationResult NetworkHelper::moveDequantizationAfter dequantization.subtractConstant->output(0).get_element_type(); } - parent = std::make_shared( - parent, - dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ? - dequantization.subtractConstant : - foldConvert(dequantization.subtractConstant, parentPrecision)); + parent = std::make_shared>( + std::vector{element::f32, element::f32}, std::vector{ element::f32 }, + ngraph::op::TemporaryReplaceOutputType(parent, element::f32).get(), + ngraph::op::TemporaryReplaceOutputType( + dequantization.subtractConstant->output(0).get_element_type() == parentPrecision ? + dequantization.subtractConstant : + foldConvert(dequantization.subtractConstant, parentPrecision), element::f32).get()); ngraph::copy_runtime_info({ newOperation, parent }, parent); } else { parent = std::make_shared(parent, dequantization.subtractConvert); @@ -1594,8 +1721,8 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr& node, const Data } } const auto subtractValues = subtractConst->cast_vector(); - if (std::any_of(subtractValues.begin(), subtractValues.end(), [min, max] (const float& val) { - return (val < min) || (val > max); })) { + if (std::any_of(subtractValues.begin(), subtractValues.end(), [min, max](const float& val) { + return (val < min) || (val > max); })) { return false; } } else if (is_type(node)) { @@ -1605,12 +1732,12 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr& node, const Data min = dataPrecision.min - 0.5f; max = dataPrecision.max + 0.5f; const auto quantizationDetails = QuantizationDetails::getDetails(as_type_ptr(node)); - for (size_t i = 0; i < quantizationDetails.outputIntervalsCount; ++i) { + for (size_t i = 0; i < quantizationDetails.outputLowValues.size(); ++i) { float shift; if (quantizationDetails.outputHighValues[i] != quantizationDetails.outputLowValues[i]) { shift = (dataPrecision.min * quantizationDetails.outputHighValues[i] - - dataPrecision.max * quantizationDetails.outputLowValues[i]) / - (quantizationDetails.outputHighValues[i] - quantizationDetails.outputLowValues[i]); + dataPrecision.max * quantizationDetails.outputLowValues[i]) / + (quantizationDetails.outputHighValues[i] - quantizationDetails.outputLowValues[i]); } else { shift = 0.f; } @@ -1619,6 +1746,7 @@ bool NetworkHelper::checkZeroPoint(const std::shared_ptr& node, const Data } } } + return true; } @@ -1705,6 +1833,23 @@ bool NetworkHelper::isDQByDynamicDimension(const std::shared_ptr& layer, s return false; } -} // namespace low_precision -} // namespace pass -} // namespace ngraph +bool isDisabled(const std::shared_ptr& node) { + for (const auto& input : node->inputs()) { + auto precisionAttribute = getAttribute>(input); + if (precisionAttribute == nullptr) { + continue; + } + + assert(precisionAttribute->get() != nullptr); + assert(precisionAttribute->get()->sharedValue != nullptr); + + const auto& precisionRestrictions = precisionAttribute->get()->sharedValue->precisions; + if (precisionRestrictions.empty()) { + return true; + } + } + return false; +} +} // namespace low_precision +} // namespace pass +} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp b/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp index 474602166751af..0ec9876e309a7d 100644 --- a/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp +++ b/inference-engine/src/low_precision_transformations/src/normalize_l2.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include "ngraph/type/element_type.hpp" #include "ngraph/type/element_type_traits.hpp" #include "low_precision/network_helper.hpp" @@ -18,6 +20,8 @@ using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::NormalizeL2Transformation, "NormalizeL2Transformation", 0); + namespace normalize_l2 { template @@ -35,6 +39,21 @@ std::shared_ptr createNewScalesConst(const ngraph::op::Con } // namespace normalize_l2 +NormalizeL2Transformation::NormalizeL2Transformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "NormalizeL2Transformation"); + this->register_matcher(m, callback); +} + bool NormalizeL2Transformation::canBeTransformed(const TransformationContext& context, std::shared_ptr operation) const { if (!LayerTransformation::canBeTransformed(context, operation)) { return false; @@ -79,17 +98,7 @@ bool NormalizeL2Transformation::canBeTransformed(const TransformationContext& co return true; } -void NormalizeL2Transformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ - make_op_label(), - make_op_label() - })); -} - -bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) const { +bool NormalizeL2Transformation::transform(TransformationContext &context, ngraph::pattern::Matcher &m) { std::shared_ptr operation = m.get_match_root(); if (!canBeTransformed(context, operation)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/prelu.cpp b/inference-engine/src/low_precision_transformations/src/prelu.cpp index 797d2d1dbfb389..17827ef9f712c7 100644 --- a/inference-engine/src/low_precision_transformations/src/prelu.cpp +++ b/inference-engine/src/low_precision_transformations/src/prelu.cpp @@ -8,6 +8,8 @@ #include #include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -15,14 +17,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void PReluTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::PReluTransformation, "PReluTransformation", 0); + +PReluTransformation::PReluTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "PReluTransformation"); + this->register_matcher(m, callback); } -bool PReluTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool PReluTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr prelu = m.get_match_root(); if (!canBeTransformed(context, prelu)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/propagate_precisions.cpp b/inference-engine/src/low_precision_transformations/src/propagate_precisions.cpp new file mode 100644 index 00000000000000..4b15dd7e7b922f --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/propagate_precisions.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/propagate_precisions.hpp" + +#include + +#include +#include +#include "low_precision/rt_info/precisions_attribute.hpp" +#include "low_precision/propagate_through_precision_preserved.hpp" +#include "low_precision/propagate_to_input.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::PropagatePrecisions, "PropagatePrecisions", 0); + +bool ngraph::pass::low_precision::PropagatePrecisions::run_on_function(std::shared_ptr f) { + ngraph::pass::Manager manager; + manager.set_per_pass_validation(false); + std::shared_ptr precisionsPropagation = manager.register_pass(); + precisionsPropagation->add_matcher>(AttributeSource::OutputPort); + precisionsPropagation->add_matcher>(); + precisionsPropagation->add_matcher>(); + manager.run_passes(f); + return false; +} diff --git a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp index ed8ef754102384..ca97aae0dc3e2c 100644 --- a/inference-engine/src/low_precision_transformations/src/quantization_details.cpp +++ b/inference-engine/src/low_precision_transformations/src/quantization_details.cpp @@ -15,6 +15,8 @@ #include #include +#include "low_precision/lpt_itt.hpp" + #include #include @@ -27,130 +29,80 @@ QuantizationDetails::QuantizationDetails() inputLowValues({}), inputHighValues({}), outputLowValues({}), - outputHighValues({}), - inputIntervalsCount(0), - outputIntervalsCount(0), - outputChannelsCount(0) {} + outputHighValues({}) {} QuantizationDetails::QuantizationDetails(const QuantizationDetails& quantizationDetails) : levels(quantizationDetails.levels), inputLowValues(quantizationDetails.inputLowValues), inputHighValues(quantizationDetails.inputHighValues), outputLowValues(quantizationDetails.outputLowValues), - outputHighValues(quantizationDetails.outputHighValues), - inputIntervalsCount(quantizationDetails.inputIntervalsCount), - outputIntervalsCount(quantizationDetails.outputIntervalsCount), - outputChannelsCount(quantizationDetails.outputChannelsCount) {} + outputHighValues(quantizationDetails.outputHighValues) {} QuantizationDetails::QuantizationDetails(const size_t levels, const std::vector& inputLowValues, const std::vector& inputHighValues, const std::vector& outputLowValues, - const std::vector& outputHighValues, const size_t inputIntervalsCount, - const size_t outputIntervalsCount, const size_t outputChannelsCount) + const std::vector& outputHighValues) : levels(levels), inputLowValues(inputLowValues), inputHighValues(inputHighValues), outputLowValues(outputLowValues), - outputHighValues(outputHighValues), - inputIntervalsCount(inputIntervalsCount), - outputIntervalsCount(outputIntervalsCount), - outputChannelsCount(outputChannelsCount) {} + outputHighValues(outputHighValues) {} bool QuantizationDetails::outputLayoutIsSupported(std::shared_ptr quantize) { - if (!is_type(quantize->get_input_node_ptr(1)) || - !is_type(quantize->get_input_node_ptr(2)) || - !is_type(quantize->get_input_node_ptr(3)) || - !is_type(quantize->get_input_node_ptr(4))) { - return false; - } - - const size_t inputLowValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(1))->cast_vector().size(); - const size_t inputHighValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(2))->cast_vector().size(); - if (inputLowValuesSize != inputHighValuesSize) { - return false; - } - - const size_t outputLowValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(3))->cast_vector().size(); - const size_t outputHighValuesSize = as_type_ptr(quantize->get_input_node_shared_ptr(4))->cast_vector().size(); - if (outputLowValuesSize != outputHighValuesSize) { - return false; - } - - return true; + return is_type(quantize->get_input_node_ptr(1)) && + is_type(quantize->get_input_node_ptr(2)) && + is_type(quantize->get_input_node_ptr(3)) && + is_type(quantize->get_input_node_ptr(4)); } void QuantizationDetails::getInputIntervals( std::shared_ptr quantize, std::vector& inputLowValues, - std::vector& inputHighValues, - size_t& inputIntervalsCount) { + std::vector& inputHighValues) { std::shared_ptr inputLowLayer = as_type_ptr(quantize->get_input_node_shared_ptr(1)); - validate(inputLowLayer); const std::vector& inputLowBlobValues = getBlobValue(inputLowLayer); inputLowValues.insert(inputLowValues.end(), inputLowBlobValues.begin(), inputLowBlobValues.end()); std::shared_ptr inputHighLayer = as_type_ptr(quantize->get_input_node_shared_ptr(2)); - validate(inputHighLayer); const std::vector inputHighBlobValues = getBlobValue(inputHighLayer); inputHighValues.insert(inputHighValues.end(), inputHighBlobValues.begin(), inputHighBlobValues.end()); if (inputLowValues.size() != inputHighValues.size()) { THROW_IE_LPT_EXCEPTION(*quantize) << "Quantize input values sizes are not equal for layer " << quantize->get_friendly_name(); } - - inputIntervalsCount = inputLowValues.size(); } void QuantizationDetails::getOutputIntervals( std::shared_ptr quantize, std::vector& outputLowValues, - std::vector& outputHighValues, - size_t& outputIntervalsCount) { + std::vector& outputHighValues) { std::shared_ptr outputLowLayer = as_type_ptr(quantize->get_input_node_shared_ptr(3)); - validate(outputLowLayer); const std::vector& outputLowBlobValues = getBlobValue(outputLowLayer); outputLowValues.insert(outputLowValues.end(), outputLowBlobValues.begin(), outputLowBlobValues.end()); std::shared_ptr outputHighLayer = as_type_ptr(quantize->get_input_node_shared_ptr(4)); - validate(outputHighLayer); const std::vector outputHighBlobValues = getBlobValue(outputHighLayer); outputHighValues.insert(outputHighValues.end(), outputHighBlobValues.begin(), outputHighBlobValues.end()); if (outputLowValues.size() != outputHighValues.size()) { THROW_IE_LPT_EXCEPTION(*quantize) << "Quantize output values sizes are not equal for layer " << quantize->get_friendly_name(); } - - outputIntervalsCount = outputLowValues.size(); } - QuantizationDetails QuantizationDetails::getDetails(std::shared_ptr quantize) { - std::vector inputLowValues; - std::vector inputHighValues; - size_t inputIntervalsCount; - getInputIntervals(quantize, inputLowValues, inputHighValues, inputIntervalsCount); - - std::vector outputLowValues; - std::vector outputHighValues; - size_t outputIntervalsCount; - getOutputIntervals(quantize, outputLowValues, outputHighValues, outputIntervalsCount); - - const size_t outputChannelsCount = outputLowValues.size() == 1ul ? 1ul : - NetworkHelper::getOutputChannelsCount(quantize, NetworkHelper::isConstantPath(quantize)); - if (!outputLayoutIsSupported(quantize)) { - THROW_IE_LPT_EXCEPTION(*quantize) << "Expected output channels count " << outputIntervalsCount << " but found " << outputChannelsCount; - } + const std::vector inputLowValues = as_type_ptr(quantize->get_input_node_shared_ptr(1))->cast_vector(); + const std::vector inputHighValues = as_type_ptr(quantize->get_input_node_shared_ptr(2))->cast_vector(); + + const std::vector outputLowValues = as_type_ptr(quantize->get_input_node_shared_ptr(3))->cast_vector(); + const std::vector outputHighValues = as_type_ptr(quantize->get_input_node_shared_ptr(4))->cast_vector(); return QuantizationDetails( - quantize->get_levels(), - inputLowValues, - inputHighValues, - outputLowValues, - outputHighValues, - inputIntervalsCount, - outputIntervalsCount, - outputChannelsCount); + quantize->get_levels(), + inputLowValues, + inputHighValues, + outputLowValues, + outputHighValues); } bool QuantizationDetails::hasNegativeOutput() const { @@ -181,63 +133,20 @@ float QuantizationDetails::maxInput(const size_t channel) const { return value; } -float QuantizationDetails::maxOutputHigh() const { - float output = getOutputHighValue(0); - for (size_t channel = 1; channel < outputIntervalsCount; ++channel) { - if (output < getOutputHighValue(channel)) { - output = getOutputHighValue(channel); - } - } - return output; -} - -float QuantizationDetails::minOutputLow() const { - float output = getOutputLowValue(0); - for (size_t channel = 1; channel < outputIntervalsCount; ++channel) { - if (output > getOutputLowValue(channel)) { - output = getOutputLowValue(channel); - } - } - return output; -} - -float QuantizationDetails::getInputLowValue(const size_t channel) const { - if ((inputIntervalsCount != 1) && (channel >= inputIntervalsCount)) { - THROW_TRANSFORMATION_EXCEPTION << "channel " << channel << " is out of bound, input channels count " << inputIntervalsCount; - } - const float value = inputLowValues.size() == 1 ? inputLowValues[0] : inputLowValues[channel]; - return value; -} - -float QuantizationDetails::getInputHighValue(const size_t channel) const { - if ((inputIntervalsCount != 1) && (channel >= inputIntervalsCount)) { - THROW_TRANSFORMATION_EXCEPTION << "channel " << channel << " is out of bound, input channels count " << inputIntervalsCount; - } - const float value = inputHighValues.size() == 1 ? inputHighValues[0] : inputHighValues[channel]; - return value; +float QuantizationDetails::getInputLowValue(const size_t index) const { + return inputLowValues.size() == 1ul ? inputLowValues[0] : inputLowValues[index]; } -float QuantizationDetails::getOutputLowValue(const size_t channel) const { - if ((outputIntervalsCount != 1) && (channel >= outputIntervalsCount)) { - THROW_TRANSFORMATION_EXCEPTION << "channel " << channel << " is out of bound, output channels count " - << outputIntervalsCount; - } - const float value = outputLowValues.size() == 1 ? outputLowValues[0] : outputLowValues[channel]; - return value; +float QuantizationDetails::getInputHighValue(const size_t index) const { + return inputHighValues.size() == 1ul ? inputHighValues[0] : inputHighValues[index]; } -float QuantizationDetails::getOutputHighValue(const size_t channel) const { - if ((outputIntervalsCount != 1) && (channel >= outputIntervalsCount)) { - THROW_TRANSFORMATION_EXCEPTION << "channel " << channel << " is out of bound, output channels count " - << outputIntervalsCount; - } - const float value = outputHighValues.size() == 1 ? outputHighValues[0] : outputHighValues[channel]; - return value; +float QuantizationDetails::getOutputLowValue(const size_t index) const { + return outputLowValues.size() == 1ul ? outputLowValues[0] : outputLowValues[index]; } -void QuantizationDetails::validate(std::shared_ptr constantLayer) { - // nothing to validate - // TODO: remove? +float QuantizationDetails::getOutputHighValue(const size_t index) const { + return outputHighValues.size() == 1ul ? outputHighValues[0] : outputHighValues[index]; } std::vector QuantizationDetails::getBlobValue(std::shared_ptr constantLayer) { diff --git a/inference-engine/src/low_precision_transformations/src/reduce_base_transformation.cpp b/inference-engine/src/low_precision_transformations/src/reduce_base_transformation.cpp index d79be9f6e5416f..e178d94b98a090 100644 --- a/inference-engine/src/low_precision_transformations/src/reduce_base_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/reduce_base_transformation.cpp @@ -13,7 +13,7 @@ namespace low_precision { ReduceBaseTransformation::ReduceBaseTransformation(const Params& params) : LayerTransformation(params) {} -bool ReduceBaseTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const { +bool ReduceBaseTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/reduce_max.cpp b/inference-engine/src/low_precision_transformations/src/reduce_max.cpp index e5c039d9fc2869..29e230314e72d9 100644 --- a/inference-engine/src/low_precision_transformations/src/reduce_max.cpp +++ b/inference-engine/src/low_precision_transformations/src/reduce_max.cpp @@ -5,18 +5,29 @@ #include "low_precision/reduce_max.hpp" #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ReduceMaxTransformation::ReduceMaxTransformation(const Params& params) : ReduceBaseTransformation(params) {} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReduceMaxTransformation, "ReduceMaxTransformation", 0); + +ReduceMaxTransformation::ReduceMaxTransformation(const Params& params) : ReduceBaseTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void ReduceMaxTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "ReduceMaxTransformation"); + this->register_matcher(m, callback); } bool ReduceMaxTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const { diff --git a/inference-engine/src/low_precision_transformations/src/reduce_mean.cpp b/inference-engine/src/low_precision_transformations/src/reduce_mean.cpp index deb5b5237d1170..c91abbeb1ccc9e 100644 --- a/inference-engine/src/low_precision_transformations/src/reduce_mean.cpp +++ b/inference-engine/src/low_precision_transformations/src/reduce_mean.cpp @@ -5,18 +5,29 @@ #include "low_precision/reduce_mean.hpp" #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ReduceMeanTransformation::ReduceMeanTransformation(const Params& params) : ReduceBaseTransformation(params) {} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReduceMeanTransformation, "ReduceMeanTransformation", 0); + +ReduceMeanTransformation::ReduceMeanTransformation(const Params& params) : ReduceBaseTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void ReduceMeanTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "ReduceMeanTransformation"); + this->register_matcher(m, callback); } bool ReduceMeanTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const { diff --git a/inference-engine/src/low_precision_transformations/src/reduce_min.cpp b/inference-engine/src/low_precision_transformations/src/reduce_min.cpp index 8e8d7ef031498d..1d0e9da5accddc 100644 --- a/inference-engine/src/low_precision_transformations/src/reduce_min.cpp +++ b/inference-engine/src/low_precision_transformations/src/reduce_min.cpp @@ -5,18 +5,29 @@ #include "low_precision/reduce_min.hpp" #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ReduceMinTransformation::ReduceMinTransformation(const Params& params) : ReduceBaseTransformation(params) {} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReduceMinTransformation, "ReduceMinTransformation", 0); + +ReduceMinTransformation::ReduceMinTransformation(const Params& params) : ReduceBaseTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void ReduceMinTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "ReduceMinTransformation"); + this->register_matcher(m, callback); } bool ReduceMinTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const { diff --git a/inference-engine/src/low_precision_transformations/src/reduce_sum.cpp b/inference-engine/src/low_precision_transformations/src/reduce_sum.cpp index 5ad65d782186f4..7ffcb435bd0895 100644 --- a/inference-engine/src/low_precision_transformations/src/reduce_sum.cpp +++ b/inference-engine/src/low_precision_transformations/src/reduce_sum.cpp @@ -5,18 +5,29 @@ #include "low_precision/reduce_sum.hpp" #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ReduceSumTransformation::ReduceSumTransformation(const Params& params) : ReduceBaseTransformation(params) {} +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReduceSumTransformation, "ReduceSumTransformation", 0); + +ReduceSumTransformation::ReduceSumTransformation(const Params& params) : ReduceBaseTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void ReduceSumTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "ReduceSumTransformation"); + this->register_matcher(m, callback); } bool ReduceSumTransformation::canBeTransformed(const TransformationContext& context, std::shared_ptr reduce) const { diff --git a/inference-engine/src/low_precision_transformations/src/relu.cpp b/inference-engine/src/low_precision_transformations/src/relu.cpp index 0a0b79bebad517..0c9f43c37e9487 100644 --- a/inference-engine/src/low_precision_transformations/src/relu.cpp +++ b/inference-engine/src/low_precision_transformations/src/relu.cpp @@ -8,6 +8,8 @@ #include #include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -15,14 +17,24 @@ namespace ngraph { namespace pass { namespace low_precision { -void ReluTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label()})); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReluTransformation, "ReluTransformation", 0); + +ReluTransformation::ReluTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "ReluTransformation"); + this->register_matcher(m, callback); } -bool ReluTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool ReluTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr relu = m.get_match_root(); if (!canBeTransformed(context, relu)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/reshape.cpp b/inference-engine/src/low_precision_transformations/src/reshape.cpp index db751f58f2fb78..f478928537ee47 100644 --- a/inference-engine/src/low_precision_transformations/src/reshape.cpp +++ b/inference-engine/src/low_precision_transformations/src/reshape.cpp @@ -11,6 +11,8 @@ #include #include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -18,11 +20,21 @@ namespace ngraph { namespace pass { namespace low_precision { -void ReshapeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ReshapeTransformation, "ReshapeTransformation", 0); + +ReshapeTransformation::ReshapeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "ReshapeTransformation"); + this->register_matcher(m, callback); } void reshapeDequantizationConstant(const std::shared_ptr& reshape) { @@ -154,7 +166,7 @@ void reshapeDequantizationConstant(const std::shared_ptr& resha } } -bool ReshapeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool ReshapeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr reshape = as_type_ptr(m.get_match_root()); if (NetworkHelper::isConstantPath(reshape)) { return false; @@ -204,6 +216,12 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex return false; } + // TODO: LPT: to support current flow: #58269 + //if (((dequantization.subtractConstant != nullptr) && NetworkHelper::isScalarLike(dequantization.subtractConstant)) || + // ((dequantization.multiplyConstant != nullptr) && NetworkHelper::isScalarLike(dequantization.multiplyConstant))) { + // return true; + //} + const Shape subtractShape = dequantization.subtract == nullptr ? Shape{} : dequantization.subtractConstant->get_shape(); Shape subtractShapeWithBatch = subtractShape; const PartialShape inputPShape = op->get_input_partial_shape(0); diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/avg_pool_precision_preserved_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/avg_pool_precision_preserved_attribute.cpp new file mode 100644 index 00000000000000..3bafe518a91b01 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/avg_pool_precision_preserved_attribute.cpp @@ -0,0 +1,27 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/avg_pool_precision_preserved_attribute.hpp" + +#include +#include +#include + +using namespace ngraph; + +template class ngraph::VariantImpl; + +constexpr VariantTypeInfo VariantWrapper::type_info; + +void VariantWrapper::merge( + std::vector>>>& attributes) { +} + +std::string VariantWrapper::to_string() { + auto value = this->m_value; + std::stringstream ss; + ss << m_value->get_string(); + ss << "value: " << (value->sharedValue->value ? "true" : "false"); + return ss.str(); +} diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp new file mode 100644 index 00000000000000..cb786a8af36e05 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/intervals_alignment_attribute.cpp @@ -0,0 +1,216 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/intervals_alignment_attribute.hpp" + +#include +#include +#include + +#include "low_precision/lpt_itt.hpp" +#include "low_precision/network_helper.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +IntervalsAlignmentAttribute::IntervalsAlignmentAttribute( + const IntervalsAlignmentSharedValue::Interval combinedInterval, + size_t levels) : levels(levels) { + sharedValue = std::make_shared(combinedInterval, combinedInterval, levels); +} + +IntervalsAlignmentAttribute::IntervalsAlignmentAttribute( + const IntervalsAlignmentSharedValue::Interval combinedInterval, + const size_t levels, + const IntervalsAlignmentSharedValue::Interval minInterval, + const size_t minLevels) : levels(levels) { + sharedValue = std::make_shared(combinedInterval, minInterval, minLevels); +} + +template class ngraph::VariantImpl; + +constexpr VariantTypeInfo VariantWrapper::type_info; + +std::shared_ptr>> VariantWrapper::create( + const std::shared_ptr& node, + const AttributeParameters& params) { + if (!is_type(node)) { + return nullptr; + } + + auto fakeQuantize = as_type_ptr(node); + if (!QuantizationDetails::outputLayoutIsSupported(fakeQuantize) || !QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels())) { + return nullptr; + } + + float lowInterval; + float highInterval; + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "calculateIntervals"); + + FakeQuantizeDequantization dequantization; + { + const auto targetInputs = node->output(0).get_target_inputs(); + if (targetInputs.size() == 1ul) { + dequantization = NetworkHelper::getDequantizationBelow(node, true); + } + } + + const auto outLow = as_type_ptr(node->get_input_node_shared_ptr(3)); + const auto outHigh = as_type_ptr(node->get_input_node_shared_ptr(4)); + if (!NetworkHelper::isScalarLike(outLow) || !NetworkHelper::isScalarLike(outHigh)) { + return nullptr; + } + + if (dequantization.empty()) { + const std::vector lowIntervals = outLow->cast_vector(); + lowInterval = *std::min_element(lowIntervals.begin(), lowIntervals.end()); + + const std::vector highIntervals = outHigh->cast_vector(); + highInterval = *std::max_element(highIntervals.begin(), highIntervals.end()); + } else { + { + auto multiplyResult = dequantization.multiplyConstant == nullptr ? + node->get_input_node_ptr(3)->shared_from_this() : + fold( + foldConvert(node->get_input_node_ptr(3)->shared_from_this(), params.deqPrecision), + dequantization.multiplyConstant); + + auto multiplyResultConstant = as_type_ptr(multiplyResult); + auto intervals = multiplyResultConstant->cast_vector(); + lowInterval = *std::min_element(intervals.begin(), intervals.end()); + } + + { + auto multiplyResult = dequantization.multiplyConstant == nullptr ? + node->get_input_node_ptr(4)->shared_from_this() : + fold( + foldConvert(node->get_input_node_ptr(4)->shared_from_this(), params.deqPrecision), + dequantization.multiplyConstant); + + auto multiplyResultConstant = as_type_ptr(multiplyResult); + auto intervals = multiplyResultConstant->cast_vector(); + highInterval = *std::max_element(intervals.begin(), intervals.end()); + } + } + + if (std::isinf(lowInterval) || std::isinf(highInterval)) { + return nullptr; + } + } + + { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::LPT_LT, "create"); + + assert(!std::isinf(lowInterval)); + assert(!std::isinf(highInterval)); + + auto& rtInfo = node->get_rt_info(); + const IntervalsAlignmentSharedValue::Interval interval{ lowInterval, highInterval }; + const auto attribute = std::make_shared<::ngraph::VariantWrapper>( + ngraph::pass::low_precision::make_shared_attribute( + interval, + fakeQuantize->get_levels())); + rtInfo[ngraph::VariantWrapper::type_info.name] = attribute; + + const std::vector outputLowValues = as_type_ptr(fakeQuantize->get_input_node_shared_ptr(3))->cast_vector(); + const std::vector outputHighValues = as_type_ptr(fakeQuantize->get_input_node_shared_ptr(4))->cast_vector(); + LayerTransformation::PrecisionDetails preferablePrecision = LayerTransformation::getPrecisionDetails( + fakeQuantize->get_levels(), + outputLowValues, + outputHighValues); + + if (preferablePrecision.precision != element::undefined) { + attribute->get()->sharedValue->preferablePrecisions.insert(preferablePrecision.precision); + } + +#ifdef LPT_DEBUG + attribute->get()->sharedValue->minLevelsOperation = node->get_friendly_name(); +#endif + + return attribute; + } +} + +void VariantWrapper::merge( + std::vector>>>& attributes) { + std::shared_ptr resultAttribute = get(); + for (const auto& attributeWrapper : attributes) { + auto attribute = attributeWrapper->get(); + + // TODO: LPT: copy/past: merge() + const auto& resultSharedValue = resultAttribute->sharedValue; + const auto& sharedValue = attribute->sharedValue; + if (resultAttribute->levels != attribute->levels) { + // TODO: LPT: not supported right now + resultAttribute->levels = 0ul; + resultSharedValue->minLevels = 0ul; + } + + if (resultSharedValue->combinedInterval.low > sharedValue->combinedInterval.low) { + resultSharedValue->combinedInterval.low = sharedValue->combinedInterval.low; + } + + if (resultSharedValue->combinedInterval.high < sharedValue->combinedInterval.high) { + resultSharedValue->combinedInterval.high = sharedValue->combinedInterval.high; + } + + assert(!std::isinf(resultSharedValue->combinedInterval.low)); + assert(!std::isinf(resultSharedValue->combinedInterval.high)); + + resultSharedValue->preferablePrecisions.insert(sharedValue->preferablePrecisions.begin(), sharedValue->preferablePrecisions.end()); + + const auto resultSize = std::abs(resultSharedValue->minInterval.high - resultSharedValue->minInterval.low); + const auto size = std::abs(sharedValue->minInterval.high - sharedValue->minInterval.low); + if (resultSize > size) { + resultSharedValue->minInterval = sharedValue->minInterval; + + float dequantizationMul; + float dequantizationSub; + float updatedOutputLowValue; + float updatedOutputHighValue; + + const size_t minLevels = NetworkHelper::calculateLevels( + 0.f, + DataPrecision::getMaxValue(resultAttribute->levels), + resultSharedValue->combinedInterval.low, + resultSharedValue->combinedInterval.high, + resultSharedValue->minInterval.low, + resultSharedValue->minInterval.high, + dequantizationMul, + dequantizationSub, + updatedOutputLowValue, + updatedOutputHighValue); + + resultSharedValue->minLevels = minLevels; + +#ifdef LPT_DEBUG + resultSharedValue->minLevelsOperation = sharedValue->minLevelsOperation; +#endif + } + } +} + +std::string VariantWrapper::to_string() { + std::stringstream preferablePrecisions; + preferablePrecisions << "{"; + size_t index = 0; + for (const auto& precision : m_value->sharedValue->preferablePrecisions) { + preferablePrecisions << (index > 0 ? ", " : "") << precision; + ++index; + } + preferablePrecisions << "}"; + + std::stringstream ss; + ss << m_value->get_string(); + ss << "levels: " + std::to_string(m_value->levels) << ", " << + "combined: { " << m_value->sharedValue->combinedInterval.low << ", " << m_value->sharedValue->combinedInterval.high << " }, " << + "min: { " << m_value->sharedValue->minInterval.low << ", " << m_value->sharedValue->minInterval.high << " }, " + "minLevels: " << m_value->sharedValue->minLevels << +#ifdef LPT_DEBUG + ", minLevelsOperation: " << m_value->sharedValue->minLevelsOperation << +#endif + ", preferablePrecisions: " << preferablePrecisions.str(); + return ss.str(); +} diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/per_tensor_quantization_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/per_tensor_quantization_attribute.cpp new file mode 100644 index 00000000000000..fe418173f2c524 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/per_tensor_quantization_attribute.cpp @@ -0,0 +1,10 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/per_tensor_quantization_attribute.hpp" + +using namespace ngraph; + +template class ngraph::VariantImpl; +constexpr VariantTypeInfo VariantWrapper::type_info; \ No newline at end of file diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/precision_preserved_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/precision_preserved_attribute.cpp new file mode 100644 index 00000000000000..8e8a9b0b62f04e --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/precision_preserved_attribute.cpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/precision_preserved_attribute.hpp" + +#include +#include + +using namespace ngraph; + +PrecisionPreservedAttribute::PrecisionPreservedAttribute(const bool value) { + sharedValue->value = value; +} + +template class ngraph::VariantImpl; + +constexpr VariantTypeInfo VariantWrapper::type_info; + +std::string VariantWrapper::to_string() { + auto& value = this->m_value; + std::stringstream ss; + ss << m_value->get_string(); + ss << "value: " << (value->sharedValue->value ? "true" : "false"); + return ss.str(); +} diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/precisions_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/precisions_attribute.cpp new file mode 100644 index 00000000000000..c69fc1d9b690d2 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/precisions_attribute.cpp @@ -0,0 +1,80 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/precisions_attribute.hpp" + +#include +#include +#include +#include +#include + +#include +#include "low_precision/network_helper.hpp" + +using namespace ngraph; + +// order defines default precision +const std::vector PrecisionsAttribute::defaultPrecisions = { ngraph::element::u8, ngraph::element::i8 }; + +PrecisionsAttribute::PrecisionsAttribute(const std::vector& precisions) { + sharedValue->precisions = precisions; +} + +template class ngraph::VariantImpl>; + +constexpr VariantTypeInfo VariantWrapper>::type_info; + +std::shared_ptr>> VariantWrapper>::create( + const std::shared_ptr& node, + const AttributeParameters& params) { + auto attribute = ngraph::pass::low_precision::make_shared_attribute(); + auto wrapper = std::make_shared>>(attribute); + + auto& rt = is_type(node) ? node->output(0).get_rt_info() : node->get_rt_info(); + rt[ngraph::VariantWrapper>::type_info.name] = wrapper; + return wrapper; +} + +void VariantWrapper>::merge( + std::vector>>>& attributes) { + auto& my = this->get()->sharedValue->precisions; + for (auto attribute : attributes) { + const auto& attributeValues = attribute->get()->sharedValue->precisions; + auto it = my.begin(); + while (it != my.end()) { + if (std::find(attributeValues.begin(), attributeValues.end(), *it) == attributeValues.end()) { + it = my.erase(it); + } else { + it++; + } + } + if (my.size() == 0ul) { + break; + } + } +} + +std::shared_ptr VariantWrapper>::init(const std::shared_ptr& node) { + return nullptr; +} + +std::string VariantWrapper>::to_string() { + std::stringstream ss; + + ss << m_value->get_string(); + + bool firstPrecision = true; + ss << "precisions: {"; + for (const auto& value : m_value->sharedValue->precisions) { + if (!firstPrecision) { + ss << ", "; + } + ss << value; + firstPrecision = false; + } + ss << "}"; + + return ss.str(); +} diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/quantization_alignment_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/quantization_alignment_attribute.cpp new file mode 100644 index 00000000000000..e02c8153b2c0d5 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/quantization_alignment_attribute.cpp @@ -0,0 +1,90 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/quantization_alignment_attribute.hpp" + +#include +#include +#include +#include + +#include +#include "low_precision/network_helper.hpp" + +using namespace ngraph; +using namespace ngraph::pass::low_precision; + +QuantizationAlignmentAttribute::QuantizationAlignmentAttribute(const bool hasToBeAligned) { + sharedValue = std::make_shared(hasToBeAligned); +} + +template class ngraph::VariantImpl; + +constexpr VariantTypeInfo VariantWrapper::type_info; + +std::shared_ptr VariantWrapper::init(const std::shared_ptr& node) { + return nullptr; +} + +std::shared_ptr>> VariantWrapper::create( + const std::shared_ptr& node, + const AttributeParameters& params) { + if (getAttribute>(node) != nullptr) { + return nullptr; + } + + if (!NetworkHelper::isPrecisionPreserved(node)) { + return nullptr; + } + + bool leastOneOperationIsFakeQuantize = false; + bool leastOneOperationIsNotFakeQuantize = false; + for (auto index = 0ul; index < node->get_input_size(); ++index) { + const auto& input = node->input(index); + auto inputNode = input.get_source_output().get_node_shared_ptr(); + + const auto dequantization = NetworkHelper::getDequantization(node, index); + if (!dequantization.empty() && + (is_type(dequantization.data.get_node())) && + is_type(dequantization.data.get_node()->get_input_node_ptr(0))) { + inputNode = dequantization.data.get_node()->get_input_node_shared_ptr(0); + } + + if (is_type(inputNode)) { + continue; + } + + if (!is_type(inputNode)) { + leastOneOperationIsNotFakeQuantize = true; + break; + } + + leastOneOperationIsFakeQuantize = true; + } + + if (leastOneOperationIsFakeQuantize && !leastOneOperationIsNotFakeQuantize) { + auto& rt = node->get_rt_info(); + const auto attribute = std::make_shared>( + make_shared_attribute()); + rt[ngraph::VariantWrapper::type_info.name] = attribute; + return attribute; + } + + return nullptr; +} + +void VariantWrapper::merge( + std::vector>>>& attributes) { + auto currentAttributte = get(); + for (const auto& attribute : attributes) { + currentAttributte->sharedValue->value = currentAttributte->sharedValue->value || attribute->get()->sharedValue->value; + } +} + +std::string VariantWrapper::to_string() { + std::stringstream ss; + ss << m_value->get_string(); + ss << "value: " << (m_value->sharedValue->value ? "true" : "false"); + return ss.str(); +} diff --git a/inference-engine/src/low_precision_transformations/src/rt_info/shared_value_attribute.cpp b/inference-engine/src/low_precision_transformations/src/rt_info/shared_value_attribute.cpp new file mode 100644 index 00000000000000..95cc5fa72eae79 --- /dev/null +++ b/inference-engine/src/low_precision_transformations/src/rt_info/shared_value_attribute.cpp @@ -0,0 +1,16 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "low_precision/rt_info/shared_value_attribute.hpp" + +#include +#include +#include +#include +#include + +#include +#include "low_precision/network_helper.hpp" + +using namespace ngraph; diff --git a/inference-engine/src/low_precision_transformations/src/shuffle_channels.cpp b/inference-engine/src/low_precision_transformations/src/shuffle_channels.cpp index 2ed3e54a86badb..129bcb23977547 100644 --- a/inference-engine/src/low_precision_transformations/src/shuffle_channels.cpp +++ b/inference-engine/src/low_precision_transformations/src/shuffle_channels.cpp @@ -8,21 +8,32 @@ #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -ShuffleChannelsTransformation::ShuffleChannelsTransformation(const Params& params) : LayerTransformation(params) {} -void ShuffleChannelsTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::ShuffleChannelsTransformation, "ShuffleChannelsTransformation", 0); + +ShuffleChannelsTransformation::ShuffleChannelsTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "ShuffleChannelsTransformation"); + this->register_matcher(m, callback); } -bool ShuffleChannelsTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const { +bool ShuffleChannelsTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/split.cpp b/inference-engine/src/low_precision_transformations/src/split.cpp index 919c6b5e87b185..a663fc64f0a2fa 100644 --- a/inference-engine/src/low_precision_transformations/src/split.cpp +++ b/inference-engine/src/low_precision_transformations/src/split.cpp @@ -4,21 +4,34 @@ #include "low_precision/split.hpp" #include "ngraph/node.hpp" + +#include + #include "low_precision/network_helper.hpp" #include "low_precision/common/dequantization_op.hpp" namespace ngraph { namespace pass { namespace low_precision { -SplitTransformation::SplitTransformation(const Params& params) : LayerTransformation(params) {} -void SplitTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::SplitTransformation, "SplitTransformation", 0); + +SplitTransformation::SplitTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "SplitTransformation"); + this->register_matcher(m, callback); } -bool SplitTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const { +bool SplitTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } @@ -106,19 +119,20 @@ void SplitTransformation::updateOutputs( TransformationContext& context, std::vector> lastNodes, std::shared_ptr originalNode) const { - const size_t outputSize = context.function->get_output_size(); - if (outputSize == 1) { + //TODO: LPT: during refactoring update is not tested + if (lastNodes.size() == 1ul) { updateOutput(context, lastNodes[0], originalNode); } else { const std::string originalName = originalNode->get_friendly_name(); - for (size_t outIdx = 0; outIdx < lastNodes.size(); ++outIdx) { - for (size_t i = 0; i < outputSize; ++i) { - std::shared_ptr result = context.function->get_output_op(i); - std::shared_ptr outputNode = result->get_input_node_shared_ptr(0); - if (outputNode.get() == lastNodes[outIdx].get()) { - originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix); - lastNodes[outIdx]->set_friendly_name(originalName + "." + std::to_string(outIdx)); - break; + for (size_t i = 0; i < lastNodes.size(); ++i) { + const auto lastNode = lastNodes[i]; + for (auto output : lastNodes[i]->outputs()) { + for (auto input : output.get_target_inputs()) { + if (is_type(input.get_node())) { + originalNode->set_friendly_name(originalName + LayerTransformation::originalLayerPostfix); + lastNode->set_friendly_name(originalName + "." + std::to_string(i)); + break; + } } } } diff --git a/inference-engine/src/low_precision_transformations/src/squeeze.cpp b/inference-engine/src/low_precision_transformations/src/squeeze.cpp index 4203f8ce4f251c..8ecad0adea489a 100644 --- a/inference-engine/src/low_precision_transformations/src/squeeze.cpp +++ b/inference-engine/src/low_precision_transformations/src/squeeze.cpp @@ -8,23 +8,32 @@ #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::SqueezeTransformation, "SqueezeTransformation", 0); + SqueezeTransformation::SqueezeTransformation(const Params& params) : LayerTransformation(params) { -} + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void SqueezeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "SqueezeTransformation"); + this->register_matcher(m, callback); } -bool SqueezeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool SqueezeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/strided_slice.cpp b/inference-engine/src/low_precision_transformations/src/strided_slice.cpp index ea01d1e8b24715..5e34d1bf45b453 100644 --- a/inference-engine/src/low_precision_transformations/src/strided_slice.cpp +++ b/inference-engine/src/low_precision_transformations/src/strided_slice.cpp @@ -7,12 +7,15 @@ #include #include +#include #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::StridedSliceTransformation, "StridedSliceTransformation", 0); + std::shared_ptr stridedSliceDeqConstant( const std::shared_ptr strSlice, const std::shared_ptr dequantizaitonConstant) { @@ -71,19 +74,22 @@ std::shared_ptr stridedSliceDeqConstant( return NetworkHelper::toScalarIfPossible(result); } -StridedSliceTransformation::StridedSliceTransformation(const Params& params) : LayerTransformation(params) {} +StridedSliceTransformation::StridedSliceTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = ngraph::pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void StridedSliceTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ - make_op_label(), - make_op_label(), - make_op_label(), - make_op_label() })); + auto m = std::make_shared(matcher, "StridedSliceTransformation"); + this->register_matcher(m, callback); } -bool StridedSliceTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) const { +bool StridedSliceTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher& m) { if (!StridedSliceTransformation::canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/subgraph.cpp b/inference-engine/src/low_precision_transformations/src/subgraph.cpp deleted file mode 100644 index 4fd36f8d7e8b6c..00000000000000 --- a/inference-engine/src/low_precision_transformations/src/subgraph.cpp +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include "low_precision/quantization_details.hpp" -#include "low_precision/common/ie_lpt_exception.hpp" -#include "low_precision/network_helper.hpp" - - -namespace ngraph { -namespace pass { -namespace low_precision { - -bool operationIsSupportedInConcat(const std::shared_ptr& node) { - // list of operations, which change channels, but supported in ConcatTransformation - if (ngraph::is_type(node) || - ngraph::is_type(node) || - ngraph::is_type(node)) { - return true; - } - - // operations, which change channels, usually don't support in ConcatTransformation - const auto inputs = node->input_values(); - for (const auto& input : inputs) { - if (ngraph::is_type(input.get_node())) { - continue; - } - - const PartialShape& in = input.get_partial_shape(); - const PartialShape& out = node->get_output_partial_shape(0); - if (in.rank().is_dynamic() || out.rank().is_dynamic()) { - return false; - } - - const auto inRank = in.rank().get_length(); - const auto outRank = out.rank().get_length(); - if (inRank < 2 || outRank < 2) { - return false; - } - - for (int i = 0; i < 2; ++i) { - if ((i >= inRank) || (i >= outRank)) { - // all previous dimensions are equal - return true; - } - if (in[i] != out[i]) { - return false; - } - } - } - - return true; -} - -Subgraph::Subgraph(ngraph::pass::ILayerTransformationsManager* layerTransformationsManager) : layerTransformationsManager(layerTransformationsManager) { -} - -bool Subgraph::fillSubgraphForQuantization( - const std::shared_ptr& fakeQuantize, - std::unordered_set& handledLayers) { - quantizationLayers.push_back(fakeQuantize); - handledLayers.insert(fakeQuantize->get_friendly_name()); - layers.emplace(fakeQuantize->get_friendly_name(), fakeQuantize); - - for (size_t index = 0; index < fakeQuantize->get_output_size(); ++index) { - const auto childInputs = fakeQuantize->get_output_target_inputs(index); - for (const auto childInput : childInputs) { - const std::shared_ptr child = childInput.get_node()->shared_from_this(); - if (handledLayers.find(child->get_friendly_name()) != handledLayers.end()) { - continue; - } - - const std::shared_ptr concatChild = ngraph::as_type_ptr(child); - if (concatChild != nullptr) { - if (!fillSubgraphForConcat(concatChild, handledLayers)) { - return false; - } - } else { - const std::shared_ptr fakeQuantizeChild = ngraph::as_type_ptr(child); - if (fakeQuantizeChild != nullptr) { - // - } else { - if (layerTransformationsManager->isPrecisionPreserved(child) && operationIsSupportedInConcat(child)) { - if (!fillSubgraphForIntermediate(child, handledLayers)) { - return false; - } - } - } - } - } - } - - return true; -} - -bool Subgraph::atLeastOneIsIntermediate(const std::shared_ptr& node) const { - for (size_t index = 0; index < node->get_output_size(); ++index) { - const auto childInputs = node->get_output_target_inputs(index); - for (const auto childInput : childInputs) { - auto child = childInput.get_node()->shared_from_this(); - if (as_type_ptr(child)) { - return true; - } - - if (!layerTransformationsManager->isPrecisionPreserved(child) || !operationIsSupportedInConcat(child)) { - // child branch is out of subgraph - continue; - } - - if (atLeastOneIsIntermediate(child)) { - return true; - } - } - } - return false; -} - -std::shared_ptr getFakeQuantize(const FakeQuantizeDequantization& dequantization) { - std::shared_ptr node = dequantization.data.get_node_shared_ptr(); - std::shared_ptr fakeQuantize = ngraph::as_type_ptr(node); - if (fakeQuantize != nullptr) { - return fakeQuantize; - } - - if (is_type(node)) { - fakeQuantize = ngraph::as_type_ptr(node->get_input_node_shared_ptr(0)); - } - return fakeQuantize; -} - -bool Subgraph::fill(const std::shared_ptr& layer, std::unordered_set& handledLayers) { - // if at least one parent is handled incorrectly then subgraph is not in low precision - for (size_t index = 0; index < layer->get_input_size(); ++index) { - const std::shared_ptr parent = layer->get_input_node_shared_ptr(index); - if (handledLayers.find(parent->get_friendly_name()) != handledLayers.end()) { - continue; - } - - const std::shared_ptr concatParent = ngraph::as_type_ptr(parent); - if (concatParent != nullptr) { - if (!fillSubgraphForConcat(concatParent, handledLayers)) { - return false; - } - } else { - const FakeQuantizeDequantization dequantization = NetworkHelper::getDequantization(parent, 0, true); - const std::shared_ptr fakeQuantizeParent = dequantization.empty() ? - ngraph::as_type_ptr(parent) : - getFakeQuantize(dequantization); - if (fakeQuantizeParent != nullptr) { - if (!fillSubgraphForQuantization(fakeQuantizeParent, handledLayers)) { - // - } - } else { - const std::shared_ptr constant = ngraph::as_type_ptr(parent); - if (constant != nullptr) { - // - } else { - if (layerTransformationsManager->isPrecisionPreserved(parent) && operationIsSupportedInConcat(parent)) { - if (!fillSubgraphForIntermediate(parent, handledLayers)) { - return false; - } - } else { - return false; - } - } - } - } - } - - // TODO: if at least one child was handled correctly then subgraph is low precision - for (size_t index = 0; index < layer->get_output_size(); ++index) { - const auto childInputs = layer->get_output_target_inputs(index); - for (const auto childInput : childInputs) { - const std::shared_ptr child = childInput.get_node()->shared_from_this(); - - if (handledLayers.find(child->get_friendly_name()) != handledLayers.end()) { - continue; - } - - const std::shared_ptr concatChild = ngraph::as_type_ptr(child); - if (concatChild != nullptr) { - if (!fillSubgraphForConcat(concatChild, handledLayers)) { - return false; - } - } else { - // check if children branches between Concat operations - if (!atLeastOneIsIntermediate(child)) { - continue; - } - - const std::shared_ptr fakeQuantizeChild = ngraph::as_type_ptr(child); - if (fakeQuantizeChild != nullptr) { - // - } else if (layerTransformationsManager->isPrecisionPreserved(child) && operationIsSupportedInConcat(child)) { - if (!fillSubgraphForIntermediate(child, handledLayers)) { - return false; - } - } - } - } - } - - return true; -} - -bool Subgraph::fillSubgraphForIntermediate(const std::shared_ptr& intermediate, std::unordered_set& handledLayers) { - handledLayers.insert(intermediate->get_friendly_name()); - layers.emplace(intermediate->get_friendly_name(), intermediate); - - return fill(intermediate, handledLayers); -} - -bool Subgraph::empty() const { - return quantizationLayers.empty(); -} - -bool Subgraph::fillSubgraphForConcat(const std::shared_ptr& concat, std::unordered_set& handledLayers) { - const auto axis = concat->get_axis(); - const size_t normalizedAxis = ngraph::normalize_axis(concat->get_friendly_name(), axis, concat->get_output_partial_shape(0).rank()); - // supported only per-channel concat - if (normalizedAxis != 1ul) { - return false; - } - - concatLayers.push_back(concat); - handledLayers.insert(concat->get_friendly_name()); - layers.emplace(concat->get_friendly_name(), concat); - - std::shared_ptr node = concat; - return fill(node, handledLayers); -} - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/subtract.cpp b/inference-engine/src/low_precision_transformations/src/subtract.cpp index 2f86bfc97c7931..4c71e191c2f6e2 100644 --- a/inference-engine/src/low_precision_transformations/src/subtract.cpp +++ b/inference-engine/src/low_precision_transformations/src/subtract.cpp @@ -11,6 +11,9 @@ #include #include +#include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -18,19 +21,27 @@ namespace ngraph { namespace pass { namespace low_precision { -void SubtractTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::SubtractTransformation, "SubtractTransformation", 0); + +SubtractTransformation::SubtractTransformation(const Params& params) : LayerTransformation(params) { + auto convert = pattern::wrap_type(); + auto multiply = pattern::wrap_type(); + auto subParent = std::make_shared(OutputVector{ convert, multiply }); + auto subtract = pattern::wrap_type({ subParent, pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(subtract, "SubtractTransformation"); + this->register_matcher(m, callback); } -bool SubtractTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool SubtractTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr subtract = as_type_ptr(m.get_match_root()); if (!canBeTransformed(context, subtract)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp b/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp index f79021f93b8bae..f8554db8721ed9 100644 --- a/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp +++ b/inference-engine/src/low_precision_transformations/src/subtract_multiply_to_multiply_add.cpp @@ -8,6 +8,7 @@ #include #include +#include #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" #include "low_precision/common/dequantization_op.hpp" @@ -16,8 +17,21 @@ namespace ngraph { namespace pass { namespace low_precision { -void SubtractMultiplyToMultiplyAddTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addSingleNodePattern(pass, context); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::SubtractMultiplyToMultiplyAddTransformation, "SubtractMultiplyToMultiplyAddTransformation", 0); + +SubtractMultiplyToMultiplyAddTransformation::SubtractMultiplyToMultiplyAddTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type(); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "SubtractMultiplyToMultiplyAddTransformation"); + this->register_matcher(m, callback); } FakeQuantizeDequantization get(const std::shared_ptr node) { @@ -52,7 +66,7 @@ FakeQuantizeDequantization get(const std::shared_ptr node) { return FakeQuantizeDequantization(dataNode, convert, subtract, subtractConvert, subtractConstant, multiply, multiplyConstant); } -bool SubtractMultiplyToMultiplyAddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool SubtractMultiplyToMultiplyAddTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { auto multiply = m.get_match_root(); if (!canBeTransformed(context, multiply)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/transformation_context.cpp b/inference-engine/src/low_precision_transformations/src/transformation_context.cpp index 22d8d3444682de..d5d21c7ecfcc9a 100644 --- a/inference-engine/src/low_precision_transformations/src/transformation_context.cpp +++ b/inference-engine/src/low_precision_transformations/src/transformation_context.cpp @@ -8,6 +8,8 @@ namespace ngraph { namespace pass { namespace low_precision { +TransformationContext::TransformationContext() : function(nullptr) {} + TransformationContext::TransformationContext(std::shared_ptr function) : function(function) { } diff --git a/inference-engine/src/low_precision_transformations/src/transformer.cpp b/inference-engine/src/low_precision_transformations/src/transformer.cpp deleted file mode 100644 index 6018c6f820f67b..00000000000000 --- a/inference-engine/src/low_precision_transformations/src/transformer.cpp +++ /dev/null @@ -1,504 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "low_precision/transformer.hpp" -#include "low_precision/network_helper.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ngraph_ops/type_relaxed.hpp" -#include "ngraph/pass/constant_folding.hpp" -#include "ngraph/opsets/opset6.hpp" - -#include "lpt_itt.h" - -// branch specific transformations -#include "low_precision/concat.hpp" -#include "low_precision/concat_multi_channels.hpp" - -// decomposition transformations -#include "low_precision/fake_quantize_decomposition.hpp" - -// general transformations -#include "low_precision/add.hpp" -#include "low_precision/avg_pool.hpp" -#include "low_precision/clamp.hpp" -#include "low_precision/convolution.hpp" -#include "low_precision/convolution_backprop_data.hpp" -#include "low_precision/depth_to_space.hpp" -#include "low_precision/fake_quantize.hpp" -#include "low_precision/group_convolution.hpp" -#include "low_precision/interpolate.hpp" -#include "low_precision/mat_mul.hpp" -#include "low_precision/max_pool.hpp" -#include "low_precision/multiply.hpp" -#include "low_precision/mvn.hpp" -#include "low_precision/normalize_l2.hpp" -#include "low_precision/prelu.hpp" -#include "low_precision/reduce_max.hpp" -#include "low_precision/reduce_mean.hpp" -#include "low_precision/reduce_min.hpp" -#include "low_precision/reduce_sum.hpp" -#include "low_precision/reshape.hpp" -#include "low_precision/relu.hpp" -#include "low_precision/shuffle_channels.hpp" -#include "low_precision/squeeze.hpp" -#include "low_precision/subtract.hpp" -#include "low_precision/split.hpp" -#include "low_precision/strided_slice.hpp" -#include "low_precision/transpose.hpp" -#include "low_precision/unsqueeze.hpp" -#include "low_precision/variadic_split.hpp" -#include "low_precision/split.hpp" - -// cleanup transformations -#include "low_precision/fuse_convert.hpp" -#include "low_precision/fold_convert.hpp" -#include "low_precision/fuse_fake_quantize.hpp" -#include "low_precision/fuse_subtract_to_fake_quantize.hpp" -#include "low_precision/fuse_multiply_to_fake_quantize.hpp" -#include "low_precision/multiply_to_group_convolution.hpp" -#include "low_precision/subtract_multiply_to_multiply_add.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -LowPrecisionTransformations::LowPrecisionTransformations( - const std::map& branchSpecificTransformations, - const std::map& decompositionTransformations, - const std::map& transformations, - const std::map>>& cleanupTransformations, - const std::vector& standaloneCleanupTransformations) : - branchSpecificTransformations(branchSpecificTransformations), - decompositionTransformations(decompositionTransformations), - transformations(transformations), - cleanupTransformations(cleanupTransformations), - standaloneCleanupTransformations(standaloneCleanupTransformations) {} - -void LowPrecisionTransformations::setUpdatePrecisions(const bool updatePrecisions) { - for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) { - it->second->setUpdatePrecisions(updatePrecisions); - } - for (auto it = transformations.begin(); it != transformations.end(); ++it) { - it->second->setUpdatePrecisions(updatePrecisions); - } -} - -void LowPrecisionTransformations::setQuantizedTensorAlignmentOnActivations( - const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnActivations) { - for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) { - it->second->setQuantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations); - } - for (auto it = transformations.begin(); it != transformations.end(); ++it) { - it->second->setQuantizedTensorAlignmentOnActivations(quantizedTensorAlignmentOnActivations); - } -} - -void LowPrecisionTransformations::setQuantizedTensorAlignmentOnWeights( - const LayerTransformation::QuantizedTensorAlignment quantizedTensorAlignmentOnWeights) { - for (auto it = branchSpecificTransformations.begin(); it != branchSpecificTransformations.end(); ++it) { - it->second->setQuantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights); - } - for (auto it = transformations.begin(); it != transformations.end(); ++it) { - it->second->setQuantizedTensorAlignmentOnWeights(quantizedTensorAlignmentOnWeights); - } -} - -std::vector LowPrecisionTransformations::find(const std::string& transformationKey) const { - auto it = branchSpecificTransformations.find(transformationKey); - std::vector res; - if (it != branchSpecificTransformations.end()) { - res.emplace_back(it->second); - } - - it = transformations.find(transformationKey); - if (it != transformations.end()) { - res.emplace_back(it->second); - } - - const auto it1 = cleanupTransformations.find(transformationKey); - if (it1 != cleanupTransformations.end()) { - for (const auto& transformation : it1->second) { - res.emplace_back(transformation.second); - } - } - - for (const auto& transformation : standaloneCleanupTransformations) { - if (transformation.typeName == transformationKey) { - res.emplace_back(transformation.transformation); - } - } - - return res; -} - -void LowPrecisionTransformations::setParamsManager(IParamsManager* paramsManager) noexcept { - setParamsManager(paramsManager, branchSpecificTransformations); - setParamsManager(paramsManager, decompositionTransformations); - setParamsManager(paramsManager, transformations); - setParamsManager(paramsManager, cleanupTransformations); - setParamsManager(paramsManager, standaloneCleanupTransformations); -} - -void LowPrecisionTransformations::setLayerTransformationsManager(ILayerTransformationsManager* layerTransformationsManager) noexcept { - setLayerTransformationsManager(layerTransformationsManager, branchSpecificTransformations); - setLayerTransformationsManager(layerTransformationsManager, decompositionTransformations); - setLayerTransformationsManager(layerTransformationsManager, transformations); - setLayerTransformationsManager(layerTransformationsManager, cleanupTransformations); - setLayerTransformationsManager(layerTransformationsManager, standaloneCleanupTransformations); -} - -void LowPrecisionTransformations::setParamsManager( - IParamsManager* paramsManager, - std::map& transformations) noexcept { - for (auto it : transformations) { - it.second->setParamsManager(paramsManager); - } -} - -void LowPrecisionTransformations::setParamsManager( - IParamsManager* paramsManager, - std::map>>& transformations) noexcept { - for (auto it : transformations) { - for (auto transform : it.second) { - transform.second->setParamsManager(paramsManager); - } - } -} - -void LowPrecisionTransformations::setParamsManager( - IParamsManager* paramsManager, - std::vector& transformations) noexcept { - for (auto it : transformations) { - it.transformation->setParamsManager(paramsManager); - } -} - -void LowPrecisionTransformations::setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::map& transformations) noexcept { - for (auto it : transformations) { - it.second->setLayerTransformationsManager(layerTransformationsManager); - } -} - -void LowPrecisionTransformations::setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::map < std::string, std::vector < std::pair> > & transformations) noexcept { - for (auto it : transformations) { - for (auto transform : it.second) { - transform.second->setLayerTransformationsManager(layerTransformationsManager); - } - } -} - -void LowPrecisionTransformations::setLayerTransformationsManager( - ILayerTransformationsManager* layerTransformationsManager, - std::vector& transformations) noexcept { - for (auto it : transformations) { - it.transformation->setLayerTransformationsManager(layerTransformationsManager); - } -} - -LowPrecisionTransformations LowPrecisionTransformer::getAllTransformations(const LayerTransformation::Params& params) { - using namespace pass::low_precision; - - auto transformer = LowPrecisionTransformations(). - addBranchSpecific(params). - - addDecomposition(params). - - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - add(params). - - addCleanup(params). - addCleanup(params). - - addStandaloneCleanup(params). - addStandaloneCleanup(params). - addStandaloneCleanup(params). - addStandaloneCleanup(params); - - return transformer; -} - -bool LowPrecisionTransformer::isFunctionQuantized(const std::shared_ptr& function) { - std::set> handledNodes; - std::deque> nodes; - for (auto result : function->get_results()) { - nodes.push_front(result); - } - - while (!nodes.empty()) { - auto node = nodes.front(); - nodes.pop_front(); - - for (size_t i = 0; i < node->inputs().size(); ++i) { - auto parent = node->get_input_node_shared_ptr(i); - if (handledNodes.find(parent) != handledNodes.end()) { - continue; - } - - const std::shared_ptr fakeQuantize = as_type_ptr(parent); - if ((fakeQuantize != nullptr) && - QuantizationDetails::outputLayoutIsSupported(fakeQuantize) && - QuantizationDetails::isSupportedLevel(fakeQuantize->get_levels())) { - return true; - } - - nodes.push_front(parent); - handledNodes.insert(parent); - } - } - return false; -} - -LowPrecisionTransformer::LowPrecisionTransformer(): transformations(LowPrecisionTransformer::getAllTransformations()) {} - -template -void make_matcher_type_relaxed(ngraph::pass::GraphRewrite* transformation) { - using namespace ngraph; - - auto is_op_type = [](std::shared_ptr n) { - return !!as_type_ptr(n); - }; - - auto p_node = std::make_shared(element::f32, Shape{}, is_op_type); - - ngraph::graph_rewrite_callback callback = [](ngraph::pattern::Matcher &m) { - auto l_node = std::dynamic_pointer_cast(m.get_match_root()); - if (std::dynamic_pointer_cast(l_node)) { - return false; - } - if (!l_node) { - THROW_IE_LPT_EXCEPTION(*l_node) << "unexpected operation type"; - } - - std::vector inputPrecisions; - for (auto& inputs : l_node->inputs()) { - inputPrecisions.push_back(inputs.get_element_type()); - } - - std::vector outputPrecisions; - for (auto& output : l_node->outputs()) { - outputPrecisions.push_back(output.get_element_type()); - } - - auto replacement = std::make_shared>(*l_node, inputPrecisions, outputPrecisions); - - copy_runtime_info(l_node, replacement); - replace_node(l_node, replacement); - return true; - }; - - auto m = std::make_shared(p_node, "TypeRelaxedReplacer"); - NGRAPH_SUPPRESS_DEPRECATED_START - transformation->add_matcher(m, callback, ngraph::pass::PassProperty::CHANGE_DYNAMIC_STATE); - NGRAPH_SUPPRESS_DEPRECATED_END -} - -TypeRelaxedReplacer::TypeRelaxedReplacer() { - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); - make_matcher_type_relaxed(this); -} - -LowPrecisionTransformer::LowPrecisionTransformer(const LowPrecisionTransformations& transformations) - : transformations(transformations) {} - -void LowPrecisionTransformer::transform(std::shared_ptr network) { - if (!isFunctionQuantized(network)) { - return; - } - - OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::LPT_LT, "LowPrecisionTransformer", "transform"); - - ngraph::pass::ConstantFolding constantFolding; - constantFolding.run_on_function(network); - - transformations.setParamsManager(this); - transformations.setLayerTransformationsManager(this); - - TransformationContext context(network); - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "TypeRelaxedReplacer"); - - // Extend necessary operations with polymorphic semantics - { - TypeRelaxedReplacer pass; - pass.run_on_function(network); - } - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "BranchSpecificTransformations"); - - { - // Branch specific transformations - GraphRewrite pass; - registerAllMatchers(transformations.branchSpecificTransformations, pass, context); - pass.run_on_function(network); - } - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FakeQuantizeDecomposition"); - - { - // Step #1: FakeQuantize decomposition transformation execution - GraphRewrite pass; - registerAllMatchers(transformations.decompositionTransformations, pass, context); - pass.run_on_function(network); - } - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "LayerTransformations"); - - { - // Step #2: layer transformations execution - GraphRewrite pass; - registerAllMatchers(transformations.transformations, pass, context); - pass.run_on_function(network); - } - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "CleanupTransformations"); - - { - // Step #3: cleanup transformations execution - GraphRewrite pass; - registerAllMatchers(transformations.cleanupTransformations, pass, context); - pass.run_on_function(network); - } - - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "StandaloneCleanupTransformations"); - - { - // Step #4: standalone cleanup transformations execution - - for (auto it : transformations.standaloneCleanupTransformations) { - GraphRewrite pass; - it.transformation->registerMatcherIn(pass, context); - pass.run_on_function(network); - } - } - - network->validate_nodes_and_infer_types(); -} - -std::vector LowPrecisionTransformer::getPrecisionsOnActivations(const Node& op) const noexcept { - const std::string operantionType = LowPrecisionTransformations::getType(op); - const std::vector transformation = transformations.find(operantionType); - if (transformation.empty()) { - return std::vector(); - } - std::vector precisions = transformation[0]->getPrecisionsOnActivations(); - - for (const auto& transform : transformation) { - precisions = NetworkHelper::precisionIntersection(precisions, transform->getPrecisionsOnActivations()); - } - return precisions; -} - -bool LowPrecisionTransformer::isQuantized(const std::shared_ptr& layer) const noexcept { - const std::string operantionType = LowPrecisionTransformations::getType(*layer); - const std::vector transformation = transformations.find(operantionType); - if (transformation.empty()) { - return false; - } - - for (const auto& transform : transformation) { - if (!transform->isQuantized(layer)) { - return false; - } - } - return true; -} - -bool LowPrecisionTransformer::isPrecisionPreserved(const std::shared_ptr& layer) const noexcept { - const std::string operantionType = LowPrecisionTransformations::getType(*layer); - const std::vector transformation = transformations.find(operantionType); - if (transformation.empty()) { - return false; - } - - for (const auto& transform : transformation) { - if (!transform->isPrecisionPreserved(layer)) { - return false; - } - } - return true; -} - -void LowPrecisionTransformer::registerAllMatchers( - std::map transformations, - GraphRewrite& pass, - TransformationContext& context) { - for (auto it : transformations) { - it.second->registerMatcherIn(pass, context); - } -} - -void LowPrecisionTransformer::registerAllMatchers( - std::map>> transformations, - GraphRewrite& pass, - TransformationContext& context) { - for (auto it : transformations) { - for (auto transform : it.second) { - transform.second->registerMatcherIn(pass, context); - } - } -} - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/inference-engine/src/low_precision_transformations/src/transparent_base_transformation.cpp b/inference-engine/src/low_precision_transformations/src/transparent_base_transformation.cpp index b8c75d43619b49..c89ca0e9144c67 100644 --- a/inference-engine/src/low_precision_transformations/src/transparent_base_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/transparent_base_transformation.cpp @@ -15,7 +15,7 @@ using namespace ngraph; using namespace ngraph::pass; using namespace ngraph::pass::low_precision; -bool TransparentBaseTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool TransparentBaseTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { auto operation = m.get_match_root(); const std::shared_ptr dequantization = operation->input_value(0).get_node_shared_ptr(); // const std::shared_ptr dequantizationParent = dequantization->input_value(0).get_node_shared_ptr(); diff --git a/inference-engine/src/low_precision_transformations/src/transpose.cpp b/inference-engine/src/low_precision_transformations/src/transpose.cpp index de3cd40e0d5257..66f29a66ec88f9 100644 --- a/inference-engine/src/low_precision_transformations/src/transpose.cpp +++ b/inference-engine/src/low_precision_transformations/src/transpose.cpp @@ -7,6 +7,8 @@ #include #include +#include + #include "low_precision/common/ie_lpt_exception.hpp" #include "low_precision/network_helper.hpp" @@ -14,11 +16,21 @@ namespace ngraph { namespace pass { namespace low_precision { -void TransposeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::TransposeTransformation, "TransposeTransformation", 0); + +TransposeTransformation::TransposeTransformation(const Params& params) : LayerTransformation(params) { + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "TransposeTransformation"); + this->register_matcher(m, callback); } void transposeDequantizationConstant(std::shared_ptr& transpose) { @@ -74,7 +86,7 @@ void transposeDequantizationConstant(std::shared_ptr& transpose) { } } -bool TransposeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool TransposeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { std::shared_ptr transpose = m.get_match_root(); if (!canBeTransformed(context, transpose)) { return false; diff --git a/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp b/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp index b53341005d477a..b03046e2253357 100644 --- a/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp +++ b/inference-engine/src/low_precision_transformations/src/unsqueeze.cpp @@ -8,23 +8,32 @@ #include #include +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::UnsqueezeTransformation, "UnsqueezeTransformation", 0); + UnsqueezeTransformation::UnsqueezeTransformation(const Params& params) : LayerTransformation(params) { -} + auto matcher = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; -void UnsqueezeTransformation::registerMatcherIn(GraphRewrite &pass, TransformationContext &context) const { - addPattern( - pass, - context, - make_op_pattern({ make_op_label(), make_op_label() })); + auto m = std::make_shared(matcher, "UnsqueezeTransformation"); + this->register_matcher(m, callback); } -bool UnsqueezeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) const { +bool UnsqueezeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { if (!canBeTransformed(context, m.get_match_root())) { return false; } diff --git a/inference-engine/src/low_precision_transformations/src/variadic_split.cpp b/inference-engine/src/low_precision_transformations/src/variadic_split.cpp index 685219f27730d0..8cc9ba7caaadea 100644 --- a/inference-engine/src/low_precision_transformations/src/variadic_split.cpp +++ b/inference-engine/src/low_precision_transformations/src/variadic_split.cpp @@ -4,20 +4,33 @@ #include "low_precision/variadic_split.hpp" #include "ngraph/node.hpp" + +#include + #include "low_precision/network_helper.hpp" namespace ngraph { namespace pass { namespace low_precision { -VariadicSplitTransformation::VariadicSplitTransformation(const Params& params) : SplitTransformation(params) {} - -void VariadicSplitTransformation::registerMatcherIn(GraphRewrite& pass, TransformationContext& context) const { - addPattern(pass, - context, - make_op_pattern({ - make_op_label(), - make_op_label(), - make_op_label() })); + +NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::VariadicSplitTransformation, "VariadicSplitTransformation", 0); + +VariadicSplitTransformation::VariadicSplitTransformation(const Params& params) : SplitTransformation(params) { + auto matcher = pattern::wrap_type({ + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type() }); + + ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { + auto op = m.get_match_root(); + if (transformation_callback(op)) { + return false; + } + return transform(*context, m); + }; + + auto m = std::make_shared(matcher, "VariadicSplitTransformation"); + this->register_matcher(m, callback); } } // namespace low_precision diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp index c760f9a7bace13..402327f277ad74 100644 --- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp +++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp @@ -42,9 +42,6 @@ bool WeightableLayerTransformation::canConvolutionBeTransformed(const Transforma if (dequantization.empty()) { const auto fqOnWeights = getFakeQuantizeOnWeights(layer); const auto dataPrecision = getDataPrecisionOnWeights(layer); - if ((!supportAsymmetricQuantization) && dataPrecision.hasZeroPoint) { - return false; - } if (!NetworkHelper::checkZeroPoint(fqOnWeights, dataPrecision)) { return false; } @@ -218,7 +215,7 @@ bool WeightableLayerTransformation::canBeTransformed(const TransformationContext return true; } -bool WeightableLayerTransformation::isQuantized(std::shared_ptr layer, bool reshapeIsRequired) const noexcept { +bool WeightableLayerTransformation::isQuantizedStatic(const std::shared_ptr& layer, const bool reshapeIsRequired) noexcept { FakeQuantizeDequantization dequantizationOnWeights; if (reshapeIsRequired) { const auto reshape = layer->get_input_node_shared_ptr(1); @@ -236,7 +233,9 @@ bool WeightableLayerTransformation::isQuantized(std::shared_ptr layer, boo const std::shared_ptr fq = as_type_ptr(layer->get_input_node_shared_ptr(1)); return NetworkHelper::isQuantizeSupported(fq); } else { - dequantizationOnWeights = NetworkHelper::getDequantization(layer, 1); + // TODO: update NetworkHelper API later + const std::shared_ptr op = const_cast(layer.get())->shared_from_this(); + dequantizationOnWeights = NetworkHelper::getDequantization(op, 1); } if (dequantizationOnWeights.empty()) { @@ -283,14 +282,21 @@ bool WeightableLayerTransformation::isPrecisionPreserved(std::shared_ptr l return false; } -void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& node, const size_t outChannelsShapeIndex) const { +bool WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const std::shared_ptr& node, const size_t outChannelsShapeIndex) const { const auto fq = getFakeQuantizeOnWeights(node); if (fq == nullptr) { - return; + // FakeQuantize has been decomposed already + return true; } const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq); - const DataPrecision dataPrecision = getDataPrecision(fq, quantizationDetails, true); + const auto precisionsAttribute = getAttributeFromOutput(fq); + const auto precisions = precisionsAttribute == nullptr ? + PrecisionsAttribute::defaultPrecisions : + precisionsAttribute->get()->sharedValue->precisions; + + const DataPrecision dataPrecision = getDataPrecision(fq, quantizationDetails, precisions); + auto tuple = NetworkHelper::decomposeFakeQuantize( fq, dataPrecision.precision, @@ -302,9 +308,16 @@ void WeightableLayerTransformation::decomposeFakeQuantizeForWeightsPath(const st outChannelsShapeIndex); std::shared_ptr fqOnWeights = std::get<0>(tuple); + // TODO: LPT: issue #58685 + if ((!updatePrecisions) && (fqOnWeights == nullptr)) { + return false; + } + if (as_type_ptr(fqOnWeights) == nullptr) { THROW_IE_LPT_EXCEPTION(*fqOnWeights) << "FakeQuantize on weights was not folded to constant"; } + + return true; } bool WeightableLayerTransformation::isGroup(const std::shared_ptr& layer) { @@ -327,7 +340,7 @@ bool WeightableLayerTransformation::isDepthwise(const std::shared_ptr& lay return (group == inputChannelsCount) && (inputChannelsCount == outputChannelsCount); } -std::shared_ptr WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr& node) const { +std::shared_ptr WeightableLayerTransformation::getFakeQuantizeOnWeights(const std::shared_ptr& node) { auto fq = as_type_ptr(node->input_value(1).get_node_shared_ptr()); // TODO: temporary workaround if (fq == nullptr) { @@ -337,10 +350,38 @@ std::shared_ptr WeightableLayerTransformation::getFakeQuan return fq; } -DataPrecision WeightableLayerTransformation::getDataPrecisionOnWeights(const std::shared_ptr& node) const { +DataPrecision WeightableLayerTransformation::getDataPrecisionOnWeights(const std::shared_ptr& node) { const auto fq = getFakeQuantizeOnWeights(node); const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fq); - return getDataPrecision(fq, quantizationDetails, true); + + const auto precisionsAttribute = getAttributeFromOutput(fq); + const auto precisions = precisionsAttribute == nullptr ? + PrecisionsAttribute::defaultPrecisions : + precisionsAttribute->get()->sharedValue->precisions; + + return getDataPrecision(fq, quantizationDetails, precisions); +} + +bool WeightableLayerTransformation::isAsymmetricOnWeights(const std::shared_ptr& node) { + const auto n = const_cast(node.get())->shared_from_this(); + + const auto reshapeFromWeights = ngraph::as_type_ptr(n->get_input_node_shared_ptr(1)); + const auto dequantization = reshapeFromWeights == nullptr ? + NetworkHelper::getDequantization(n, 1ul) : + NetworkHelper::getDequantization(reshapeFromWeights); + + if (dequantization.empty()) { + const auto dataPrecision = WeightableLayerTransformation::getDataPrecisionOnWeights(n); + if (dataPrecision.hasZeroPoint) { + return true; + } + } else { + if (dequantization.subtract != nullptr) { + return true; + } + } + + return false; } } // namespace low_precision diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index f2bbc52bdc56bb..6e066a4656c384 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -34,8 +34,7 @@ ie_mark_target_as_cc(${TARGET_NAME}) if(SELECTIVE_BUILD STREQUAL "ON") # After disabling a block of code, some variables might be unused. - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR - CMAKE_CXX_COMPILER_ID MATCHES "^(Apple)?Clang$") + if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG) target_compile_options(${TARGET_NAME} PRIVATE -Wno-unused-variable) endif() endif() diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 9d10ad036da759..4e3fba2d2b1c8b 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -45,7 +45,7 @@ #include #include #include -#include +#include /***************************************************** * Debug capability @@ -94,7 +94,7 @@ void MKLDNNGraph::Replicate(const std::shared_ptr &subgr this->reuse_io_tensors = false; isQuantizedFlag = (config.lpTransformsMode == Config::On) && - ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(subgraph); + ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(subgraph); // Map data object onto producer node std::map, std::pair> op2node; @@ -192,7 +192,7 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana } isQuantizedFlag = (config.lpTransformsMode == Config::On) && - ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(func); + ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(func); auto orderedOps = func->get_ordered_ops(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 35dd779d6f11d9..59a29ebf40ae8f 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -26,6 +26,7 @@ #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" #include #include +#include #include #include #include @@ -74,13 +75,12 @@ #include #include -#include -#include -#include +#include #include #include #include -#include +#include +#include #include #include @@ -88,6 +88,7 @@ #include "nodes/mkldnn_mvn_node.h" #include "nodes/mkldnn_fake_quantize_node.h" +#include "nodes/mkldnn_normalize_node.h" #include "ngraph_transformations/convert_to_cpu_specific_opset.hpp" #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) @@ -121,7 +122,7 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { const bool useLpt = (conf.lpTransformsMode == Config::LPTransformsMode::On) && - ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc); + ngraph::pass::low_precision::LowPrecision::isFunctionQuantized(nGraphFunc); if (useLpt) { manager.register_pass( std::vector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 }); @@ -278,6 +279,13 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { return node->input_value(0).get_partial_shape().rank().get_length() > 5; }); + auto normalizeL2FusionCallback = [](const_node_ptr &node) -> bool { + std::string errorMsg; + return !MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg); + }; + pass_config->set_callback(normalizeL2FusionCallback); + pass_config->set_callback(normalizeL2FusionCallback); + // List of enabled/disabled transformations pass_config->disable(); pass_config->disable(); @@ -314,30 +322,45 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { if (useLpt) { OV_ITT_SCOPE(FIRST_INFERENCE, MKLDNNPlugin::itt::domains::MKLDNN_LT, "LowPrecisionTransformations"); - ngraph::pass::Manager manager; - auto lptPrerequisites = manager.register_pass(); - const std::vector supportedTypes = { ngraph::element::i8, ngraph::element::u8 }; - lptPrerequisites->add_matcher(supportedTypes); - lptPrerequisites->add_matcher(supportedTypes); - lptPrerequisites->add_matcher(); - manager.run_passes(nGraphFunc); - - auto params = LayerTransformation::Params( - true, // updatePrecisions - LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations - LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights - true); // supportAsymmetricQuantization - LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params) - .add( - LayerTransformation::Params(params).setPrecisionsOnActivations({ngraph::element::u8}).setSupportAsymmetricQuantization(true)) - .add( - LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true)) - .addStandaloneCleanup( - LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })) - .add( - LayerTransformation::Params(params).setSupportAsymmetricQuantization(false))); - - transformer.transform(nGraphFunc); + auto supportedPrecisions = std::vector({ + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8}}, + {1, {ngraph::element::i8}}, + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8, ngraph::element::i8}}, + {1, {ngraph::element::i8}} + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8}}, + {1, {ngraph::element::i8}} + }), + OperationPrecisionRestriction::create({ + {0, {ngraph::element::u8}}, + {1, {ngraph::element::i8}}, + }), + }); + + auto perTensorQuantization = std::vector({ + OperationPerTensorQuantizationRestriction::create({0}), + OperationPerTensorQuantizationRestriction::create({0}) + }); + + ngraph::pass::Manager lptManager; + lptManager.register_pass(supportedPrecisions, perTensorQuantization); + lptManager.get_pass_config()->set_callback([](const_node_ptr& node) -> bool { + if (const auto mulitply = std::dynamic_pointer_cast(node)) { + return !MultiplyToGroupConvolutionTransformation::canBeTransformedToGroupConvolution(mulitply); + } + return false; + }); + lptManager.get_pass_config()->set_callback([](const_node_ptr& node) -> bool { + return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node); + }); + lptManager.get_pass_config()->set_callback([](const_node_ptr& node) -> bool { + return MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node); + }); + lptManager.run_passes(nGraphFunc); } ngraph::pass::Manager postLPTPassManager; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp index ff95f416573a25..2da3ae8f330064 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp @@ -660,7 +660,7 @@ MKLDNNNormalizeL2Node::MKLDNNNormalizeL2Node(const std::shared_ptr } } -bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { const auto norm = std::dynamic_pointer_cast(op); if (!norm) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h index bcb7b0d8d491f2..6b6a62bf42c418 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h @@ -84,7 +84,7 @@ class MKLDNNNormalizeL2Node : public MKLDNNNode { return false; } - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; bool canFuse(const MKLDNNNodePtr& node) const override; private: diff --git a/inference-engine/src/offline_transformations/src/moc_transformations.cpp b/inference-engine/src/offline_transformations/src/moc_transformations.cpp index 0b7d66f3743080..71e877bca3e843 100644 --- a/inference-engine/src/offline_transformations/src/moc_transformations.cpp +++ b/inference-engine/src/offline_transformations/src/moc_transformations.cpp @@ -18,6 +18,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0); @@ -38,16 +45,27 @@ bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr(); manager.register_pass(); + auto eliminations = manager.register_pass(); + eliminations->add_matcher(); + eliminations->set_name("ngraph::pass::CommonEliminations"); + auto common_fusions = manager.register_pass(); + common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(); + common_fusions->add_matcher(); common_fusions->add_matcher(); + common_fusions->add_matcher(); + common_fusions->add_matcher(); common_fusions->add_matcher(); common_fusions->set_name("ngraph::pass::CommonFusions"); + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); // Restore original shapes to the nGraph Function diff --git a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp index ffefeed06f0c2b..3af55071aa9c89 100644 --- a/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp +++ b/inference-engine/src/preprocessing/ie_preprocess_gapi_kernels.hpp @@ -349,7 +349,7 @@ template - const int operator()(type_to_type) { return cv_type_to_depth::depth; } + int operator()(type_to_type) { return cv_type_to_depth::depth; } }; } // namespace diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp index 905356b4d5fd7a..22aac2e1c71d33 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp @@ -25,10 +25,10 @@ ngraph::pass::NormalizeL2FusionWithMax::NormalizeL2FusionWithMax() { auto pow = std::make_shared(input, exp); auto axes = ngraph::pattern::wrap_type(); auto reduce_sum = std::make_shared(pow, axes); - auto sqrt = std::make_shared(reduce_sum); auto eps_const = ngraph::pattern::wrap_type(); - auto sqrt_max_eps = std::make_shared(sqrt, eps_const); - auto divide = std::make_shared(input, sqrt_max_eps); + auto max = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(max); + auto divide = std::make_shared(input, sqrt); ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); @@ -52,12 +52,14 @@ ngraph::pass::NormalizeL2FusionWithMax::NormalizeL2FusionWithMax() { const auto eps_attr_value = eps_attr->cast_vector()[0]; auto normalize_l2 = std::make_shared(data_input, axes_input, eps_attr_value, op::EpsMode::MAX); + if (transformation_callback(normalize_l2)) + return false; normalize_l2->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({pattern_to_output.at(pow).get_node_shared_ptr(), pattern_to_output.at(reduce_sum).get_node_shared_ptr(), pattern_to_output.at(sqrt).get_node_shared_ptr(), - pattern_to_output.at(sqrt_max_eps).get_node_shared_ptr(), + pattern_to_output.at(max).get_node_shared_ptr(), pattern_to_output.at(divide).get_node_shared_ptr() }, normalize_l2); @@ -79,10 +81,10 @@ ngraph::pass::NormalizeL2FusionWithAdd::NormalizeL2FusionWithAdd() { auto pow = std::make_shared(input, exp); auto axes = ngraph::pattern::wrap_type(); auto reduce_sum = std::make_shared(pow, axes); - auto sqrt = std::make_shared(reduce_sum); auto eps_const = ngraph::pattern::wrap_type(); - auto sqrt_add_eps = std::make_shared(sqrt, eps_const); - auto divide = std::make_shared(input, sqrt_add_eps); + auto add = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(add); + auto divide = std::make_shared(input, sqrt); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); @@ -106,12 +108,14 @@ ngraph::pass::NormalizeL2FusionWithAdd::NormalizeL2FusionWithAdd() { const auto eps_attr_value = op::util::has_constant_value(exp_input, 2.0f); auto normalize_l2 = std::make_shared(data_input, axes_input, eps_attr_value, op::EpsMode::ADD); + if (transformation_callback(normalize_l2)) + return false; normalize_l2->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({pattern_to_output.at(pow).get_node_shared_ptr(), pattern_to_output.at(reduce_sum).get_node_shared_ptr(), pattern_to_output.at(sqrt).get_node_shared_ptr(), - pattern_to_output.at(sqrt_add_eps).get_node_shared_ptr(), + pattern_to_output.at(add).get_node_shared_ptr(), pattern_to_output.at(divide).get_node_shared_ptr() }, normalize_l2); diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/transpose_sinking.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/transpose_sinking.cpp index 9a52445bf76f02..21211a7be462cb 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/transpose_sinking.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/transpose_sinking.cpp @@ -212,8 +212,9 @@ ngraph::pass::TransposeFuse::TransposeFuse() { auto new_order = ngraph::opset7::Constant::create(element::i64, {order2.size()}, order2); auto new_transpose = register_new_node(input, new_order); + new_transpose->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({ transpose1, transpose2 }, new_transpose); - ngraph::replace_node(transpose2, new_transpose); + ngraph::replace_node(m.get_match_root(), new_transpose); } return true; diff --git a/inference-engine/src/transformations/src/transformations/serialize.cpp b/inference-engine/src/transformations/src/transformations/serialize.cpp index 93f9c24e4b81bb..68214a5c543a97 100644 --- a/inference-engine/src/transformations/src/transformations/serialize.cpp +++ b/inference-engine/src/transformations/src/transformations/serialize.cpp @@ -851,7 +851,7 @@ bool pass::Serialize::run_on_function(std::shared_ptr f) { try { serializeFunc(xml_file, bin_file); - } catch (const ngraph::CheckFailure& e) { + } catch (const ngraph::CheckFailure&) { // optimization decission was made to create .bin file upfront and // write to it directly instead of buffering its content in memory, // hence we need to delete it here in case of failure diff --git a/inference-engine/src/vpu/CMakeLists.txt b/inference-engine/src/vpu/CMakeLists.txt index 959ad02186c000..3a11a33509736c 100644 --- a/inference-engine/src/vpu/CMakeLists.txt +++ b/inference-engine/src/vpu/CMakeLists.txt @@ -20,6 +20,9 @@ if(ENABLE_MYRIAD) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/custom_kernels/ DESTINATION ${IE_CPACK_LIBRARY_PATH}/vpu_custom_kernels COMPONENT myriad) + install(DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/vpu_custom_kernels/ + DESTINATION ${IE_CPACK_LIBRARY_PATH}/vpu_custom_kernels + COMPONENT myriad) install(DIRECTORY ${VPU_CLC_MA2X8X_ROOT}/ DESTINATION deployment_tools/tools/cl_compiler COMPONENT myriad diff --git a/inference-engine/src/vpu/common/CMakeLists.txt b/inference-engine/src/vpu/common/CMakeLists.txt index 71c727b631ab0f..d8b55be48257e8 100644 --- a/inference-engine/src/vpu/common/CMakeLists.txt +++ b/inference-engine/src/vpu/common/CMakeLists.txt @@ -15,7 +15,7 @@ function(add_common_target TARGET_NAME STATIC_IE) UNITY ) - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if(CMAKE_COMPILER_IS_GNUCXX) # TODO: enable some day and fix all warnings # target_compile_options(${TARGET_NAME} PRIVATE "-Wall") target_compile_options(${TARGET_NAME} PRIVATE "-Werror=unused-function") diff --git a/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp b/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp index 745613c977ece8..ada40a74d84498 100644 --- a/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp +++ b/inference-engine/src/vpu/common/include/vpu/utils/containers.hpp @@ -11,24 +11,33 @@ namespace vpu { -template class Map> -inline std::vector getKeys(const Map& map) { +template